From 031bd879f79d59d2f4fccd44377adf24fb977b5a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 6 Sep 2012 18:35:13 +0530 Subject: [PATCH 1/5] ARM: mm: implement LoUIS API for cache maintenance ops ARM v7 architecture introduced the concept of cache levels and related control registers. New processors like A7 and A15 embed an L2 unified cache controller that becomes part of the cache level hierarchy. Some operations in the kernel like cpu_suspend and __cpu_disable do not require a flush of the entire cache hierarchy to DRAM but just the cache levels belonging to the Level of Unification Inner Shareable (LoUIS), which in most of ARM v7 systems correspond to L1. The current cache flushing API used in cpu_suspend and __cpu_disable, flush_cache_all(), ends up flushing the whole cache hierarchy since for v7 it cleans and invalidates all cache levels up to Level of Coherency (LoC) which cripples system performance when used in hot paths like hotplug and cpuidle. Therefore a new kernel cache maintenance API must be added to cope with latest ARM system requirements. This patch adds flush_cache_louis() to the ARM kernel cache maintenance API. This function cleans and invalidates all data cache levels up to the Level of Unification Inner Shareable (LoUIS) and invalidates the instruction cache for processors that support it (> v7). This patch also creates an alias of the cache LoUIS function to flush_kern_all for all processor versions prior to v7, so that the current cache flushing behaviour is unchanged for those processors. v7 cache maintenance code implements a cache LoUIS function that cleans and invalidates the D-cache up to LoUIS and invalidates the I-cache, according to the new API. Reviewed-by: Santosh Shilimkar Reviewed-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/include/asm/cacheflush.h | 15 +++++++++++++ arch/arm/include/asm/glue-cache.h | 1 + arch/arm/mm/cache-fa.S | 3 +++ arch/arm/mm/cache-v3.S | 3 +++ arch/arm/mm/cache-v4.S | 3 +++ arch/arm/mm/cache-v4wb.S | 3 +++ arch/arm/mm/cache-v4wt.S | 3 +++ arch/arm/mm/cache-v6.S | 3 +++ arch/arm/mm/cache-v7.S | 36 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-arm1020.S | 3 +++ arch/arm/mm/proc-arm1020e.S | 3 +++ arch/arm/mm/proc-arm1022.S | 3 +++ arch/arm/mm/proc-arm1026.S | 3 +++ arch/arm/mm/proc-arm920.S | 3 +++ arch/arm/mm/proc-arm922.S | 3 +++ arch/arm/mm/proc-arm925.S | 3 +++ arch/arm/mm/proc-arm926.S | 3 +++ arch/arm/mm/proc-arm940.S | 3 +++ arch/arm/mm/proc-arm946.S | 3 +++ arch/arm/mm/proc-feroceon.S | 3 +++ arch/arm/mm/proc-macros.S | 1 + arch/arm/mm/proc-mohawk.S | 3 +++ arch/arm/mm/proc-xsc3.S | 3 +++ arch/arm/mm/proc-xscale.S | 3 +++ 24 files changed, 113 insertions(+) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e4448e16046d..e1489c54cd12 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -49,6 +49,13 @@ * * Unconditionally clean and invalidate the entire cache. * + * flush_kern_louis() + * + * Flush data cache levels up to the level of unification + * inner shareable and invalidate the I-cache. + * Only needed from v7 onwards, falls back to flush_cache_all() + * for all other processor versions. + * * flush_user_all() * * Clean and invalidate all user space cache entries @@ -97,6 +104,7 @@ struct cpu_cache_fns { void (*flush_icache_all)(void); void (*flush_kern_all)(void); + void (*flush_kern_louis)(void); void (*flush_user_all)(void); void (*flush_user_range)(unsigned long, unsigned long, unsigned int); @@ -119,6 +127,7 @@ extern struct cpu_cache_fns cpu_cache; #define __cpuc_flush_icache_all cpu_cache.flush_icache_all #define __cpuc_flush_kern_all cpu_cache.flush_kern_all +#define __cpuc_flush_kern_louis cpu_cache.flush_kern_louis #define __cpuc_flush_user_all cpu_cache.flush_user_all #define __cpuc_flush_user_range cpu_cache.flush_user_range #define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range @@ -139,6 +148,7 @@ extern struct cpu_cache_fns cpu_cache; extern void __cpuc_flush_icache_all(void); extern void __cpuc_flush_kern_all(void); +extern void __cpuc_flush_kern_louis(void); extern void __cpuc_flush_user_all(void); extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int); extern void __cpuc_coherent_kern_range(unsigned long, unsigned long); @@ -204,6 +214,11 @@ static inline void __flush_icache_all(void) __flush_icache_preferred(); } +/* + * Flush caches up to Level of Unification Inner Shareable + */ +#define flush_cache_louis() __cpuc_flush_kern_louis() + #define flush_cache_all() __cpuc_flush_kern_all() static inline void vivt_flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index 7e30874377e6..2d6a7de87a88 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -132,6 +132,7 @@ #ifndef MULTI_CACHE #define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all) #define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all) +#define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_louis) #define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all) #define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range) #define __cpuc_coherent_kern_range __glue(_CACHE,_coherent_kern_range) diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 072016371093..e505befe51b5 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -240,6 +240,9 @@ ENTRY(fa_dma_unmap_area) mov pc, lr ENDPROC(fa_dma_unmap_area) + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S index 52e35f32eefb..8a3fadece8d3 100644 --- a/arch/arm/mm/cache-v3.S +++ b/arch/arm/mm/cache-v3.S @@ -128,6 +128,9 @@ ENTRY(v3_dma_map_area) ENDPROC(v3_dma_unmap_area) ENDPROC(v3_dma_map_area) + .globl v3_flush_kern_cache_louis + .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 022135d2b7e4..43e5d77be677 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -140,6 +140,9 @@ ENTRY(v4_dma_map_area) ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 8f1eeae340c8..cd4945321407 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -251,6 +251,9 @@ ENTRY(v4wb_dma_unmap_area) mov pc, lr ENDPROC(v4wb_dma_unmap_area) + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index b34a5f908a82..11e5e5838bc5 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -196,6 +196,9 @@ ENTRY(v4wt_dma_map_area) ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 4b10760c56d6..d8fd4d4bd3d4 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -326,6 +326,9 @@ ENTRY(v6_dma_unmap_area) mov pc, lr ENDPROC(v6_dma_unmap_area) + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + __INITDATA @ define struct cpu_cache_fns (see and proc-macros.S) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 39e3fb3db801..d1fa2f66d8c0 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -33,6 +33,24 @@ ENTRY(v7_flush_icache_all) mov pc, lr ENDPROC(v7_flush_icache_all) + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr + ands r3, r0, #0xe00000 @ extract LoUIS from clidr + mov r3, r3, lsr #20 @ r3 = LoUIS * 2 + moveq pc, lr @ return if level == 0 + mov r10, #0 @ r10 (starting level) = 0 + b loop1 @ start flushing cache levels +ENDPROC(v7_flush_dcache_louis) + /* * v7_flush_dcache_all() * @@ -120,6 +138,24 @@ ENTRY(v7_flush_kern_cache_all) mov pc, lr ENDPROC(v7_flush_kern_cache_all) + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_louis) + /* * v7_flush_cache_all() * diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 0650bb87c1e3..2bb61e703d6c 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -368,6 +368,9 @@ ENTRY(arm1020_dma_unmap_area) mov pc, lr ENDPROC(arm1020_dma_unmap_area) + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1020 diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 4188478325a6..8f96aa40f510 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -354,6 +354,9 @@ ENTRY(arm1020e_dma_unmap_area) mov pc, lr ENDPROC(arm1020e_dma_unmap_area) + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1020e diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 33c68824bff0..8ebe4a469a22 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -343,6 +343,9 @@ ENTRY(arm1022_dma_unmap_area) mov pc, lr ENDPROC(arm1022_dma_unmap_area) + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1022 diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index fbc1d5fc24dc..093fc7e520c3 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -337,6 +337,9 @@ ENTRY(arm1026_dma_unmap_area) mov pc, lr ENDPROC(arm1026_dma_unmap_area) + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm1026 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 1a8c138eb897..2c3b9421ab5e 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -319,6 +319,9 @@ ENTRY(arm920_dma_unmap_area) mov pc, lr ENDPROC(arm920_dma_unmap_area) + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm920 #endif diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 4c44d7e1c3ca..4464c49d7449 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -321,6 +321,9 @@ ENTRY(arm922_dma_unmap_area) mov pc, lr ENDPROC(arm922_dma_unmap_area) + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm922 #endif diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ec5b1180994f..281eb9b9c1d6 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -376,6 +376,9 @@ ENTRY(arm925_dma_unmap_area) mov pc, lr ENDPROC(arm925_dma_unmap_area) + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm925 diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index c31e62c606c0..f1803f7e2972 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -339,6 +339,9 @@ ENTRY(arm926_dma_unmap_area) mov pc, lr ENDPROC(arm926_dma_unmap_area) + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm926 diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index a613a7dd7146..8da189d4a402 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -267,6 +267,9 @@ ENTRY(arm940_dma_unmap_area) mov pc, lr ENDPROC(arm940_dma_unmap_area) + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm940 diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 9f4f2999fdd0..f666cf34075a 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -310,6 +310,9 @@ ENTRY(arm946_dma_unmap_area) mov pc, lr ENDPROC(arm946_dma_unmap_area) + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions arm946 diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 23a8e4c7f2bd..85e5e3baa3d4 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -415,6 +415,9 @@ ENTRY(feroceon_dma_unmap_area) mov pc, lr ENDPROC(feroceon_dma_unmap_area) + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions feroceon diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 2d8ff3ad86d3..b29a2265af01 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -299,6 +299,7 @@ ENTRY(\name\()_processor_functions) ENTRY(\name\()_cache_fns) .long \name\()_flush_icache_all .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis .long \name\()_flush_user_cache_all .long \name\()_flush_user_cache_range .long \name\()_coherent_kern_range diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index fbb2124a547d..82f9cdc751d6 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -303,6 +303,9 @@ ENTRY(mohawk_dma_unmap_area) mov pc, lr ENDPROC(mohawk_dma_unmap_area) + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions mohawk diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index b0d57869da2d..eb93d6487f35 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -337,6 +337,9 @@ ENTRY(xsc3_dma_unmap_area) mov pc, lr ENDPROC(xsc3_dma_unmap_area) + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions xsc3 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 4ffebaa595ee..b5ea31d6daac 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -410,6 +410,9 @@ ENTRY(xscale_dma_unmap_area) mov pc, lr ENDPROC(xscale_dma_unmap_area) + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + @ define struct cpu_cache_fns (see and proc-macros.S) define_cache_functions xscale From 3287be8c4e2bd91211b3947ba726d95e8a1092b5 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 18 Sep 2012 16:29:44 +0100 Subject: [PATCH 2/5] ARM: mm: rename jump labels in v7_flush_dcache_all function This patch renames jump labels in v7_flush_dcache_all in order to define a specific flush cache levels entry point. Acked-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/mm/cache-v7.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index d1fa2f66d8c0..140b294bbd9b 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -48,7 +48,7 @@ ENTRY(v7_flush_dcache_louis) mov r3, r3, lsr #20 @ r3 = LoUIS * 2 moveq pc, lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 - b loop1 @ start flushing cache levels + b flush_levels @ start flushing cache levels ENDPROC(v7_flush_dcache_louis) /* @@ -67,7 +67,7 @@ ENTRY(v7_flush_dcache_all) mov r3, r3, lsr #23 @ left align loc bit field beq finished @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only @@ -89,9 +89,9 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size -loop2: +loop1: mov r9, r4 @ create working copy of max way size -loop3: +loop2: ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 THUMB( lsl r6, r9, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 @@ -100,13 +100,13 @@ loop3: THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index bge loop2 + subs r7, r7, #1 @ decrement the index + bge loop1 skip: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 + bgt flush_levels finished: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr From dbee0c6fb4c1269b2dfc8b0b7a29907ea7fed560 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 7 Sep 2012 11:06:57 +0530 Subject: [PATCH 3/5] ARM: kernel: update cpu_suspend code to use cache LoUIS operations In processors like A15/A7 L2 cache is unified and integrated within the processor cache hierarchy, so that it is not considered an outer cache anymore. For processors like A15/A7 flush_cache_all() ends up cleaning all cache levels up to Level of Coherency (LoC) that includes the L2 unified cache. When a single CPU is suspended (CPU idle) a complete L2 clean is not required, so generic cpu_suspend code must clean the data cache using the newly introduced cache LoUIS function. The context and stack pointer (context pointer) are cleaned to main memory using cache area functions that operate on MVA and guarantee that the data is written back to main memory (perform cache cleaning up to the Point of Coherency - PoC) so that the processor can fetch the context when the MMU is off in the cpu_resume code path. outer_cache management remains unchanged. Reviewed-by: Santosh Shilimkar Reviewed-by: Nicolas Pitre Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/kernel/suspend.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 1794cc3b0f18..358bca3a995e 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -17,6 +17,8 @@ extern void cpu_resume_mmu(void); */ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) { + u32 *ctx = ptr; + *save_ptr = virt_to_phys(ptr); /* This must correspond to the LDM in cpu_resume() assembly */ @@ -26,7 +28,20 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) cpu_do_suspend(ptr); - flush_cache_all(); + flush_cache_louis(); + + /* + * flush_cache_louis does not guarantee that + * save_ptr and ptr are cleaned to main memory, + * just up to the Level of Unification Inner Shareable. + * Since the context pointer and context itself + * are to be retrieved with the MMU off that + * data must be cleaned from all cache levels + * to main memory using "area" cache primitives. + */ + __cpuc_flush_dcache_area(ctx, ptrsz); + __cpuc_flush_dcache_area(save_ptr, sizeof(*save_ptr)); + outer_clean_range(*save_ptr, *save_ptr + ptrsz); outer_clean_range(virt_to_phys(save_ptr), virt_to_phys(save_ptr) + sizeof(*save_ptr)); From e6b866e954a7f0d0144a951c158f3922dac1e6b9 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 7 Sep 2012 11:09:15 +0530 Subject: [PATCH 4/5] ARM: kernel: update __cpu_disable to use cache LoUIS maintenance API When a CPU is hotplugged out caches that reside in its power domain lose their contents and so must be cleaned to the next memory level. Currently, __cpu_disable calls flush_cache_all() that for new generation processor like A15/A7 ends up cleaning and invalidating all cache levels up to Level of Coherency, which includes the unified L2. This ends up being a waste of cycles since the L2 cache contents are not lost on power down. This patch updates __cpu_disable to use the new LoUIS API cache operations. Acked-by: Nicolas Pitre Reviewed-by: Santosh Shilimkar Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/kernel/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ebd8ad274d76..199558b9462e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -134,8 +134,11 @@ int __cpu_disable(void) /* * Flush user cache and TLB mappings, and then remove this CPU * from the vm mask set of all processes. + * + * Caches are flushed to the Level of Unification Inner Shareable + * to write-back dirty lines to unified caches shared by all CPUs. */ - flush_cache_all(); + flush_cache_louis(); local_flush_tlb_all(); clear_tasks_mm_cpumask(cpu); From 6323fa2256baa73d6a960ee57ec086b66aeecd0b Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 10 Sep 2012 15:07:26 +0530 Subject: [PATCH 5/5] ARM: mm: update __v7_setup() to the new LoUIS cache maintenance API The ARMv7 processor setup function __v7_setup() cleans and invalidates the CPU cache before enabling MMU to start the CPU with a clean CPU local cache. But on ARMv7 architectures like Cortex-[A15/A8], this code will end up flushing the L2 caches(up to level of Coherency) which is undesirable and expensive. The setup functions are used in the CPU hotplug scenario too and hence flushing all cache levels should be avoided. This patch replaces the cache flushing call with the newly introduced v7 dcache LoUIS API where only cache levels up to LoUIS are cleaned and invalidated when a processors executes __v7_setup which is the expected behavior. For processors like A9 and A5 where the L2 cache is an outer one the behavior should be unchanged. Reviewed-by: Nicolas Pitre Signed-off-by: Santosh Shilimkar Signed-off-by: Lorenzo Pieralisi Tested-by: Shawn Guo --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c2e2b66f72b5..846d279f3176 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -172,7 +172,7 @@ __v7_ca15mp_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_all + bl v7_flush_dcache_louis ldmia r12, {r0-r5, r7, r9, r11, lr} mrc p15, 0, r0, c0, c0, 0 @ read main ID register