From 1f44de0f5e309e8699b569b49a8e89ef4e7527c7 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Mon, 18 Apr 2022 07:07:13 +0100 Subject: [PATCH 01/10] ARM: 9193/1: amba: Add amba_read_periphid() helper Add new amba_read_periphid() helper to simplify error handling. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- drivers/amba/bus.c | 157 +++++++++++++++++++++------------------------ 1 file changed, 74 insertions(+), 83 deletions(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index d3bd14aaabf6..0073d8ba0353 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -395,12 +395,80 @@ static void amba_device_release(struct device *dev) kfree(d); } -static int amba_device_try_add(struct amba_device *dev, struct resource *parent) +static int amba_read_periphid(struct amba_device *dev) { - u32 size; + struct reset_control *rstc; + u32 size, pid, cid; void __iomem *tmp; int i, ret; + ret = dev_pm_domain_attach(&dev->dev, true); + if (ret) + goto err_out; + + ret = amba_get_enable_pclk(dev); + if (ret) + goto err_pm; + + /* + * Find reset control(s) of the amba bus and de-assert them. + */ + rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node); + if (IS_ERR(rstc)) { + ret = PTR_ERR(rstc); + if (ret != -EPROBE_DEFER) + dev_err(&dev->dev, "can't get reset: %d\n", ret); + goto err_clk; + } + reset_control_deassert(rstc); + reset_control_put(rstc); + + size = resource_size(&dev->res); + tmp = ioremap(dev->res.start, size); + if (!tmp) { + ret = -ENOMEM; + goto err_clk; + } + + /* + * Read pid and cid based on size of resource + * they are located at end of region + */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8); + + if (cid == CORESIGHT_CID) { + /* set the base to the start of the last 4k block */ + void __iomem *csbase = tmp + size - 4096; + + dev->uci.devarch = readl(csbase + UCI_REG_DEVARCH_OFFSET); + dev->uci.devtype = readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff; + } + + if (cid == AMBA_CID || cid == CORESIGHT_CID) { + dev->periphid = pid; + dev->cid = cid; + } + + if (!dev->periphid) + ret = -ENODEV; + + iounmap(tmp); + +err_clk: + amba_put_disable_pclk(dev); +err_pm: + dev_pm_domain_detach(&dev->dev, true); +err_out: + return ret; +} + +static int amba_device_try_add(struct amba_device *dev, struct resource *parent) +{ + int ret; + ret = request_resource(parent, &dev->res); if (ret) goto err_out; @@ -409,93 +477,16 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) if (dev->periphid != 0) goto skip_probe; - /* - * Dynamically calculate the size of the resource - * and use this for iomap - */ - size = resource_size(&dev->res); - tmp = ioremap(dev->res.start, size); - if (!tmp) { - ret = -ENOMEM; - goto err_release; - } - - ret = dev_pm_domain_attach(&dev->dev, true); - if (ret) { - iounmap(tmp); - goto err_release; - } - - ret = amba_get_enable_pclk(dev); - if (ret == 0) { - u32 pid, cid; - struct reset_control *rstc; - - /* - * Find reset control(s) of the amba bus and de-assert them. - */ - rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node); - if (IS_ERR(rstc)) { - ret = PTR_ERR(rstc); - if (ret != -EPROBE_DEFER) - dev_err(&dev->dev, "can't get reset: %d\n", - ret); - goto err_reset; - } - reset_control_deassert(rstc); - reset_control_put(rstc); - - /* - * Read pid and cid based on size of resource - * they are located at end of region - */ - for (pid = 0, i = 0; i < 4; i++) - pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << - (i * 8); - for (cid = 0, i = 0; i < 4; i++) - cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << - (i * 8); - - if (cid == CORESIGHT_CID) { - /* set the base to the start of the last 4k block */ - void __iomem *csbase = tmp + size - 4096; - - dev->uci.devarch = - readl(csbase + UCI_REG_DEVARCH_OFFSET); - dev->uci.devtype = - readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff; - } - - amba_put_disable_pclk(dev); - - if (cid == AMBA_CID || cid == CORESIGHT_CID) { - dev->periphid = pid; - dev->cid = cid; - } - - if (!dev->periphid) - ret = -ENODEV; - } - - iounmap(tmp); - dev_pm_domain_detach(&dev->dev, true); - + ret = amba_read_periphid(dev); if (ret) goto err_release; - - skip_probe: +skip_probe: ret = device_add(&dev->dev); - err_release: +err_release: if (ret) release_resource(&dev->res); - err_out: +err_out: return ret; - - err_reset: - amba_put_disable_pclk(dev); - iounmap(tmp); - dev_pm_domain_detach(&dev->dev, true); - goto err_release; } /* From 7719a68b2fa404fa8af6b0b7119a38c406c74858 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Mon, 18 Apr 2022 07:07:14 +0100 Subject: [PATCH 02/10] ARM: 9192/1: amba: fix memory leak in amba_device_try_add() If amba_device_try_add() return error code (not EPROBE_DEFER), memory leak occurred when amba device fails to read periphid. unreferenced object 0xc1c60800 (size 1024): comm "swapper/0", pid 1, jiffies 4294937333 (age 75.200s) hex dump (first 32 bytes): 40 40 db c1 04 08 c6 c1 04 08 c6 c1 00 00 00 00 @@.............. 00 d9 c1 c1 84 6f 38 c1 00 00 00 00 01 00 00 00 .....o8......... backtrace: [<(ptrval)>] kmem_cache_alloc_trace+0x168/0x2b4 [<(ptrval)>] amba_device_alloc+0x38/0x7c [<(ptrval)>] of_platform_bus_create+0x2f4/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_populate+0x70/0xc4 [<(ptrval)>] of_platform_default_populate_init+0xb4/0xcc [<(ptrval)>] do_one_initcall+0x58/0x218 [<(ptrval)>] kernel_init_freeable+0x250/0x29c [<(ptrval)>] kernel_init+0x24/0x148 [<(ptrval)>] ret_from_fork+0x14/0x1c [<00000000>] 0x0 unreferenced object 0xc1db4040 (size 64): comm "swapper/0", pid 1, jiffies 4294937333 (age 75.200s) hex dump (first 32 bytes): 31 63 30 66 30 30 30 30 2e 77 64 74 00 00 00 00 1c0f0000.wdt.... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<(ptrval)>] __kmalloc_track_caller+0x19c/0x2f8 [<(ptrval)>] kvasprintf+0x60/0xcc [<(ptrval)>] kvasprintf_const+0x54/0x78 [<(ptrval)>] kobject_set_name_vargs+0x34/0xa8 [<(ptrval)>] dev_set_name+0x40/0x5c [<(ptrval)>] of_device_make_bus_id+0x128/0x1f8 [<(ptrval)>] of_platform_bus_create+0x4dc/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_bus_create+0x380/0x4e8 [<(ptrval)>] of_platform_populate+0x70/0xc4 [<(ptrval)>] of_platform_default_populate_init+0xb4/0xcc [<(ptrval)>] do_one_initcall+0x58/0x218 [<(ptrval)>] kernel_init_freeable+0x250/0x29c [<(ptrval)>] kernel_init+0x24/0x148 [<(ptrval)>] ret_from_fork+0x14/0x1c Fix them by adding amba_device_put() to release device name and amba device. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- drivers/amba/bus.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 0073d8ba0353..7e775ba6fdd9 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -478,8 +478,14 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) goto skip_probe; ret = amba_read_periphid(dev); - if (ret) + if (ret) { + if (ret != -EPROBE_DEFER) { + amba_device_put(dev); + goto err_out; + } goto err_release; + } + skip_probe: ret = device_add(&dev->dev); err_release: From 952f03316352c606bebef56ba8f9642edbb8e348 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:38:27 +0100 Subject: [PATCH 03/10] ARM: 9194/1: assembler: simplify ldr_this_cpu for !SMP builds When CONFIG_SMP is not defined, the CPU offset is always zero, and so we can simplify the sequence to load a per-CPU variable. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/assembler.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 34fe8d2dd5d1..8e59d748358d 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -692,8 +692,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) /* * ldr_va - load a 32-bit word from the virtual address of \sym */ - .macro ldr_va, rd:req, sym:req, cond + .macro ldr_va, rd:req, sym:req, cond, tmp + .ifnb \tmp + __ldst_va ldr, \rd, \tmp, \sym, \cond + .else __ldst_va ldr, \rd, \rd, \sym, \cond + .endif .endm /* @@ -727,9 +731,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) * are permitted to overlap with 'rd' if != sp */ .macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req -#if __LINUX_ARM_ARCH__ >= 7 || \ - !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \ - (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) +#ifndef CONFIG_SMP + ldr_va \rd, \sym, tmp=\t1 +#elif __LINUX_ARM_ARCH__ >= 7 || \ + !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \ + (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) this_cpu_offset \t1 mov_l \t2, \sym ldr \rd, [\t1, \t2] From 508074607c7b95b24f0adf633fdf606761bb7824 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:41:31 +0100 Subject: [PATCH 04/10] ARM: 9195/1: entry: avoid explicit literal loads ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into registers without having to rely on literal loads that go via the D-cache. For older cores, we now support a similar arrangement, based on PC-relative group relocations. This means we can elide most literal loads entirely from the entry path, by switching to the ldr_va macro to emit the appropriate sequence depending on the target architecture revision. While at it, switch to the bl_r macro for invoking the right PABT/DABT helpers instead of setting the LR register explicitly, which does not play well with cores that speculate across function returns. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/assembler.h | 18 ++++++++-------- arch/arm/kernel/entry-armv.S | 37 ++++++-------------------------- arch/arm/kernel/entry-common.S | 10 +-------- arch/arm/kernel/entry-header.S | 3 +-- 4 files changed, 18 insertions(+), 50 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 8e59d748358d..90fbe4a3f9c8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) __adldst_l str, \src, \sym, \tmp, \cond .endm - .macro __ldst_va, op, reg, tmp, sym, cond + .macro __ldst_va, op, reg, tmp, sym, cond, offset #if __LINUX_ARM_ARCH__ >= 7 || \ !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \ (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) mov_l \tmp, \sym, \cond - \op\cond \reg, [\tmp] #else /* * Avoid a literal load, by emitting a sequence of ADD/LDR instructions @@ -683,20 +682,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym .reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym .reloc .L2_\@, R_ARM_LDR_PC_G2, \sym -.L0_\@: sub\cond \tmp, pc, #8 -.L1_\@: sub\cond \tmp, \tmp, #4 -.L2_\@: \op\cond \reg, [\tmp, #0] +.L0_\@: sub\cond \tmp, pc, #8 - \offset +.L1_\@: sub\cond \tmp, \tmp, #4 - \offset +.L2_\@: #endif + \op\cond \reg, [\tmp, #\offset] .endm /* * ldr_va - load a 32-bit word from the virtual address of \sym */ - .macro ldr_va, rd:req, sym:req, cond, tmp + .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0 .ifnb \tmp - __ldst_va ldr, \rd, \tmp, \sym, \cond + __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset .else - __ldst_va ldr, \rd, \rd, \sym, \cond + __ldst_va ldr, \rd, \rd, \sym, \cond, \offset .endif .endm @@ -704,7 +704,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) * str_va - store a 32-bit word to the virtual address of \sym */ .macro str_va, rn:req, sym:req, tmp:req, cond - __ldst_va str, \rn, \tmp, \sym, \cond + __ldst_va str, \rn, \tmp, \sym, \cond, 0 .endm /* diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7a8682468a84..6e7dfb4786e3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -61,9 +61,8 @@ .macro pabt_helper @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 #ifdef MULTI_PABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_PABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC + bl_r ip #else bl CPU_PABORT_HANDLER #endif @@ -82,9 +81,8 @@ @ the fault status register in r1. r9 must be preserved. @ #ifdef MULTI_DABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_DABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC + bl_r ip #else bl CPU_DABORT_HANDLER #endif @@ -302,16 +300,6 @@ __fiq_svc: UNWIND(.fnend ) ENDPROC(__fiq_svc) - .align 5 -.LCcralign: - .word cr_alignment -#ifdef MULTI_DABORT -.LCprocfns: - .word processor -#endif -.LCfp: - .word fp_enter - /* * Abort mode handlers */ @@ -370,7 +358,7 @@ ENDPROC(__fiq_abt) THUMB( stmia sp, {r0 - r12} ) ATRAP( mrc p15, 0, r7, c1, c0, 0) - ATRAP( ldr r8, .LCcralign) + ATRAP( ldr_va r8, cr_alignment) ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance @@ -379,8 +367,6 @@ ENDPROC(__fiq_abt) str r3, [sp] @ save the "real" r0 copied @ from the exception stack - ATRAP( ldr r8, [r8, #0]) - @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -505,9 +491,7 @@ __und_usr_thumb: */ #if __LINUX_ARM_ARCH__ < 7 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ -#define NEED_CPU_ARCHITECTURE - ldr r5, .LCcpu_architecture - ldr r5, [r5] + ldr_va r5, cpu_architecture cmp r5, #CPU_ARCH_ARMv7 blo __und_usr_fault_16 @ 16bit undefined instruction /* @@ -654,12 +638,6 @@ call_fpe: ret.w lr @ CP#14 (Debug) ret.w lr @ CP#15 (Control) -#ifdef NEED_CPU_ARCHITECTURE - .align 2 -.LCcpu_architecture: - .word __cpu_architecture -#endif - #ifdef CONFIG_NEON .align 6 @@ -685,9 +663,8 @@ call_fpe: #endif do_fpe: - ldr r4, .LCfp add r10, r10, #TI_FPSTATE @ r10 = workspace - ldr pc, [r4] @ Call FP module USR entry point + ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point /* * The FP module is called with these registers set: diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 90d40f4d56cf..ad3210e5cb69 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -198,7 +198,7 @@ ENTRY(vector_swi) #endif reload_current r10, ip zero_fp - alignment_trap r10, ip, __cr_alignment + alignment_trap r10, ip, cr_alignment asm_trace_hardirqs_on save=0 enable_irq_notrace ct_user_exit save=0 @@ -328,14 +328,6 @@ __sys_trace_return: bl syscall_trace_exit b ret_slow_syscall - .align 5 -#ifdef CONFIG_ALIGNMENT_TRAP - .type __cr_alignment, #object -__cr_alignment: - .word cr_alignment -#endif - .ltorg - .macro syscall_table_start, sym .equ __sys_nr, 0 .type \sym, #object diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 9a1dc142f782..5865621bf691 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -48,8 +48,7 @@ .macro alignment_trap, rtmp1, rtmp2, label #ifdef CONFIG_ALIGNMENT_TRAP mrc p15, 0, \rtmp2, c1, c0, 0 - ldr \rtmp1, \label - ldr \rtmp1, [\rtmp1] + ldr_va \rtmp1, \label teq \rtmp1, \rtmp2 mcrne p15, 0, \rtmp1, c1, c0, 0 #endif From c4f486f1e7b34b27ec578494a236061b337d50ae Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:55:35 +0100 Subject: [PATCH 05/10] ARM: 9198/1: spectre-bhb: simplify BPIALL vector macro The BPIALL mitigation for Spectre-BHB adds a single instruction to the handler sequence that doesn't clobber any registers. Given that these sequences are 10 instructions long, they don't fit neatly into a cacheline anyway, so we can simply move that single instruction to the start of the unmitigated one, and rearrange the symbol names accordingly. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 6e7dfb4786e3..87cb06316aca 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1078,6 +1078,12 @@ __kuser_helper_end: */ .macro vector_stub, name, mode, correction=0 .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +vector_bhb_bpiall_\name: + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". +#endif vector_\name: .if \correction @@ -1129,21 +1135,6 @@ vector_bhb_loop8_\name: isb b 2b ENDPROC(vector_bhb_loop8_\name) - -vector_bhb_bpiall_\name: - .if \correction - sub lr, lr, #\correction - .endif - - @ Save r0, lr_ (parent PC) - stmia sp, {r0, lr} - - @ bhb workaround - mcr p15, 0, r0, c7, c5, 6 @ BPIALL - @ isb not needed due to "movs pc, lr" in the vector stub - @ which gives a "context synchronisation". - b 2b -ENDPROC(vector_bhb_bpiall_\name) .previous #endif From 892c608a7d7380b9a7c8f0d6aab99b763fd6fd3f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:57:45 +0100 Subject: [PATCH 06/10] ARM: 9199/1: spectre-bhb: use local DSB and elide ISB in loop8 sequence The loop8 mitigation for Spectre-BHB only requires a CPU local DSB rather than a systemwide one, which is much more costly. And by the same reasoning as why it is justified to omit the ISB after BPIALL, we can also elide the ISB and rely on the exception return for the context synchronization. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 5 +++-- arch/arm/kernel/entry-common.S | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 87cb06316aca..43ab77553e84 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1131,8 +1131,9 @@ vector_bhb_loop8_\name: 3: W(b) . + 4 subs r0, r0, #1 bne 3b - dsb - isb + dsb nsh + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". b 2b ENDPROC(vector_bhb_loop8_\name) .previous diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ad3210e5cb69..7aa3ded4af92 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -164,7 +164,7 @@ ENTRY(vector_bhb_loop8_swi) 1: b 2f 2: subs r8, r8, #1 bne 1b - dsb + dsb nsh isb b 3f ENDPROC(vector_bhb_loop8_swi) From 1290c70d72b2959ba0a4e029edfcb7afa62a5c73 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 10:02:43 +0100 Subject: [PATCH 07/10] ARM: 9200/1: spectre-bhb: avoid cross-subsection jump using a numbered label In order to minimize potential confusion regarding numbered labels appearing in a different order in the assembler output due to the use of subsections, use a named local label to jump back into the vector handler code from the associated loop8 mitigation sequence. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 43ab77553e84..df02044e1d6c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1094,7 +1094,8 @@ vector_\name: stmia sp, {r0, lr} @ save r0, lr @ Save spsr_ (parent CPSR) -2: mrs lr, spsr +.Lvec_\name: + mrs lr, spsr str lr, [sp, #8] @ save spsr @ @@ -1134,7 +1135,7 @@ vector_bhb_loop8_\name: dsb nsh @ isb not needed due to "movs pc, lr" in the vector stub @ which gives a "context synchronisation". - b 2b + b .Lvec_\name ENDPROC(vector_bhb_loop8_\name) .previous #endif From ad12c2f1587c6ec9b52ff226f438955bfae6ad89 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 10:06:50 +0100 Subject: [PATCH 08/10] ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads The assembler does not permit 'LDR PC, ' when the symbol lives in a different section, which is why we have been relying on rather fragile open-coded arithmetic to load the address of the vector_swi routine into the program counter using a single LDR instruction in the SWI slot in the vector table. The literal was moved to a different section to in commit 19accfd373847 ("ARM: move vector stubs") to ensure that the vector stubs page does not need to be mapped readable for user space, which is the case for the vector page itself, as it carries the kuser helpers as well. So the cross-section literal load is open-coded, and this relies on the address of vector_swi to be at the very start of the vector stubs page, and we won't notice if we got it wrong until booting the kernel and see it break. Fortunately, it was guaranteed to break, so this was fragile but not problematic. Now that we have added two other variants of the vector table, we have 3 occurrences of the same trick, and so the size of our ISA/compiler/CPU validation space has tripled, in a way that may cause regressions to only be observed once booting the image in question on a CPU that exercises a particular vector table. So let's switch to true cross section references, and let the linker fix them up like it fixes up all the other cross section references in the vector page. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index df02044e1d6c..c39303e5c234 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1146,10 +1146,15 @@ ENDPROC(vector_bhb_loop8_\name) .endm .section .stubs, "ax", %progbits - @ This must be the first word + @ These need to remain at the start of the section so that + @ they are in range of the 'SWI' entries in the vector tables + @ located 4k down. +.L__vector_swi: .word vector_swi #ifdef CONFIG_HARDEN_BRANCH_HISTORY +.L__vector_bhb_loop8_swi: .word vector_bhb_loop8_swi +.L__vector_bhb_bpiall_swi: .word vector_bhb_bpiall_swi #endif @@ -1292,10 +1297,11 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits -.L__vectors_start: W(b) vector_rst W(b) vector_und - W(ldr) pc, .L__vectors_start + 0x1000 +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) + W(ldr) pc, . W(b) vector_pabt W(b) vector_dabt W(b) vector_addrexcptn @@ -1304,10 +1310,11 @@ vector_addrexcptn: #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits -.L__vectors_bhb_loop8_start: W(b) vector_rst W(b) vector_bhb_loop8_und - W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004 +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) + W(ldr) pc, . W(b) vector_bhb_loop8_pabt W(b) vector_bhb_loop8_dabt W(b) vector_addrexcptn @@ -1315,10 +1322,11 @@ vector_addrexcptn: W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits -.L__vectors_bhb_bpiall_start: W(b) vector_rst W(b) vector_bhb_bpiall_und - W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008 +ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) +THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi ) + W(ldr) pc, . W(b) vector_bhb_bpiall_pabt W(b) vector_bhb_bpiall_dabt W(b) vector_addrexcptn From 8294fec1cab7ae6153525eb68401ed5905921371 Mon Sep 17 00:00:00 2001 From: Nick Hawkins Date: Wed, 18 May 2022 14:38:37 +0100 Subject: [PATCH 09/10] ARM: 9206/1: A9: Add ARM ERRATA 764319 workaround (Updated) Enable the workaround for the 764319 Cortex A-9 erratum. CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an unexpected Undefined Instruction exception when the DBGSWENABLE external pin is set to 0, even when the CP14 accesses are performed from a privileged mode. The work around catches the exception in a way the kernel does not stop execution with the use of undef_hook. This has been found to effect the HPE GXP SoC. Signed-off-by: Nick Hawkins Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 11 +++++++++++ arch/arm/kernel/hw_breakpoint.c | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2e8091e2d8a8..0dcf88e7f9cf 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -972,6 +972,17 @@ config ARM_ERRATA_764369 relevant cache maintenance functions and sets a specific bit in the diagnostic control register of the SCU. +config ARM_ERRATA_764319 + bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction" + depends on CPU_V7 + help + This option enables the workaround for the 764319 Cortex A-9 erratum. + CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an + unexpected Undefined Instruction exception when the DBGSWENABLE + external pin is set to 0, even when the CP14 accesses are performed + from a privileged mode. This work around catches the exception in a + way the kernel does not stop execution. + config ARM_ERRATA_775420 bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock" depends on CPU_V7 diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b1423fb130ea..054e9199f30d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -941,6 +941,23 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, return ret; } +#ifdef CONFIG_ARM_ERRATA_764319 +static int oslsr_fault; + +static int debug_oslsr_trap(struct pt_regs *regs, unsigned int instr) +{ + oslsr_fault = 1; + instruction_pointer(regs) += 4; + return 0; +} + +static struct undef_hook debug_oslsr_hook = { + .instr_mask = 0xffffffff, + .instr_val = 0xee115e91, + .fn = debug_oslsr_trap, +}; +#endif + /* * One-time initialisation. */ @@ -974,7 +991,16 @@ static bool core_has_os_save_restore(void) case ARM_DEBUG_ARCH_V7_1: return true; case ARM_DEBUG_ARCH_V7_ECP14: +#ifdef CONFIG_ARM_ERRATA_764319 + oslsr_fault = 0; + register_undef_hook(&debug_oslsr_hook); ARM_DBG_READ(c1, c1, 4, oslsr); + unregister_undef_hook(&debug_oslsr_hook); + if (oslsr_fault) + return false; +#else + ARM_DBG_READ(c1, c1, 4, oslsr); +#endif if (oslsr & ARM_OSLSR_OSLM0) return true; fallthrough; From b6f21d14f1ac1261579b691673a0c823275cbaf8 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 20 May 2022 08:49:55 +0100 Subject: [PATCH 10/10] ARM: 9204/2: module: Add all unwind tables when load module For EABI stack unwinding, when loading .ko module the EXIDX sections will be added to a unwind_table list. However not all EXIDX sections are added because EXIDX sections are searched by hardcoded section names. For functions in other sections such as .ref.text or .kprobes.text, gcc generates seprated EXIDX sections (such as .ARM.exidx.ref.text or .ARM.exidx.kprobes.text). These extra EXIDX sections are not loaded, so when unwinding functions in these sections, we will failed with: unwind: Index not found xxx To fix that, I refactor the code for searching and adding EXIDX sections: - Check section type to search EXIDX tables (0x70000001) instead of strcmp() the hardcoded names. Then find the corresponding text sections by their section names. - Add a unwind_table list in module->arch to save their own unwind_table instead of the fixed-lenth array. - Save .ARM.exidx.init.text section ptr, because it should be cleaned after module init. Now all EXIDX sections of .ko can be added correctly. Signed-off-by: Chen Zhongjin Acked-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/module.h | 17 ++------ arch/arm/include/asm/unwind.h | 1 + arch/arm/kernel/module.c | 78 ++++++++++++++++++----------------- 3 files changed, 45 insertions(+), 51 deletions(-) diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index cfffae67c04e..5546c9751478 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -3,20 +3,10 @@ #define _ASM_ARM_MODULE_H #include - -struct unwind_table; +#include #ifdef CONFIG_ARM_UNWIND -enum { - ARM_SEC_INIT, - ARM_SEC_DEVINIT, - ARM_SEC_CORE, - ARM_SEC_EXIT, - ARM_SEC_DEVEXIT, - ARM_SEC_HOT, - ARM_SEC_UNLIKELY, - ARM_SEC_MAX, -}; +#define ELF_SECTION_UNWIND 0x70000001 #endif #define PLT_ENT_STRIDE L1_CACHE_BYTES @@ -36,7 +26,8 @@ struct mod_plt_sec { struct mod_arch_specific { #ifdef CONFIG_ARM_UNWIND - struct unwind_table *unwind[ARM_SEC_MAX]; + struct list_head unwind_list; + struct unwind_table *init_table; #endif #ifdef CONFIG_ARM_MODULE_PLTS struct mod_plt_sec core; diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index 0f8a3439902d..b51f85417f58 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -24,6 +24,7 @@ struct unwind_idx { struct unwind_table { struct list_head list; + struct list_head mod_list; const struct unwind_idx *start; const struct unwind_idx *origin; const struct unwind_idx *stop; diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 549abcedf795..d59c36dc0494 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -459,46 +459,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, #ifdef CONFIG_ARM_UNWIND const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum; - struct mod_unwind_map maps[ARM_SEC_MAX]; - int i; + struct list_head *unwind_list = &mod->arch.unwind_list; - memset(maps, 0, sizeof(maps)); + INIT_LIST_HEAD(unwind_list); + mod->arch.init_table = NULL; for (s = sechdrs; s < sechdrs_end; s++) { const char *secname = secstrs + s->sh_name; + const char *txtname; + const Elf_Shdr *txt_sec; - if (!(s->sh_flags & SHF_ALLOC)) + if (!(s->sh_flags & SHF_ALLOC) || + s->sh_type != ELF_SECTION_UNWIND) continue; - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].unw_sec = s; - else if (strcmp(".ARM.exidx.text.hot", secname) == 0) - maps[ARM_SEC_HOT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].txt_sec = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].txt_sec = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].txt_sec = s; - else if (strcmp(".text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].txt_sec = s; - else if (strcmp(".text.hot", secname) == 0) - maps[ARM_SEC_HOT].txt_sec = s; - } + if (!strcmp(".ARM.exidx", secname)) + txtname = ".text"; + else + txtname = secname + strlen(".ARM.exidx"); + txt_sec = find_mod_section(hdr, sechdrs, txtname); - for (i = 0; i < ARM_SEC_MAX; i++) - if (maps[i].unw_sec && maps[i].txt_sec) - mod->arch.unwind[i] = - unwind_table_add(maps[i].unw_sec->sh_addr, - maps[i].unw_sec->sh_size, - maps[i].txt_sec->sh_addr, - maps[i].txt_sec->sh_size); + if (txt_sec) { + struct unwind_table *table = + unwind_table_add(s->sh_addr, + s->sh_size, + txt_sec->sh_addr, + txt_sec->sh_size); + + list_add(&table->mod_list, unwind_list); + + /* save init table for module_arch_freeing_init */ + if (strcmp(".ARM.exidx.init.text", secname) == 0) + mod->arch.init_table = table; + } + } #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT s = find_mod_section(hdr, sechdrs, ".pv_table"); @@ -519,19 +513,27 @@ void module_arch_cleanup(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - int i; + struct unwind_table *tmp; + struct unwind_table *n; - for (i = 0; i < ARM_SEC_MAX; i++) { - unwind_table_del(mod->arch.unwind[i]); - mod->arch.unwind[i] = NULL; + list_for_each_entry_safe(tmp, n, + &mod->arch.unwind_list, mod_list) { + list_del(&tmp->mod_list); + unwind_table_del(tmp); } + mod->arch.init_table = NULL; #endif } void __weak module_arch_freeing_init(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]); - mod->arch.unwind[ARM_SEC_INIT] = NULL; + struct unwind_table *init = mod->arch.init_table; + + if (init) { + mod->arch.init_table = NULL; + list_del(&init->mod_list); + unwind_table_del(init); + } #endif }