2019-06-04 16:11:33 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/arch/arm/kernel/entry-armv.S
|
|
|
|
*
|
|
|
|
* Copyright (C) 1996,1997,1998 Russell King.
|
|
|
|
* ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk)
|
2006-01-14 05:05:25 +08:00
|
|
|
* nommu support by Hyok S. Choi (hyok.choi@samsung.com)
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* Low-level vector interface routines
|
|
|
|
*
|
2007-12-04 21:33:33 +08:00
|
|
|
* Note: there is a StrongARM bug in the STMIA rn, {regs}^ instruction
|
|
|
|
* that causes it to save wrong values... Be aware!
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
|
2015-05-02 08:13:42 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
|
2012-03-11 00:30:31 +08:00
|
|
|
#include <asm/assembler.h>
|
ARM: mm: Make virt_to_pfn() a static inline
Making virt_to_pfn() a static inline taking a strongly typed
(const void *) makes the contract of a passing a pointer of that
type to the function explicit and exposes any misuse of the
macro virt_to_pfn() acting polymorphic and accepting many types
such as (void *), (unitptr_t) or (unsigned long) as arguments
without warnings.
Doing this is a bit intrusive: virt_to_pfn() requires
PHYS_PFN_OFFSET and PAGE_SHIFT to be defined, and this is defined in
<asm/page.h>, so this must be included *before* <asm/memory.h>.
The use of macros were obscuring the unclear inclusion order here,
as the macros would eventually be resolved, but a static inline
like this cannot be compiled with unresolved macros.
The naive solution to include <asm/page.h> at the top of
<asm/memory.h> does not work, because <asm/memory.h> sometimes
includes <asm/page.h> at the end of itself, which would create a
confusing inclusion loop. So instead, take the approach to always
unconditionally include <asm/page.h> at the end of <asm/memory.h>
arch/arm uses <asm/memory.h> explicitly in a lot of places,
however it turns out that if we just unconditionally include
<asm/memory.h> into <asm/page.h> and switch all inclusions of
<asm/memory.h> to <asm/page.h> instead, we enforce the right
order and <asm/memory.h> will always have access to the
definitions.
Put an inclusion guard in place making it impossible to include
<asm/memory.h> explicitly.
Link: https://lore.kernel.org/linux-mm/20220701160004.2ffff4e5ab59a55499f4c736@linux-foundation.org/
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2022-06-02 16:18:32 +08:00
|
|
|
#include <asm/page.h>
|
2011-02-06 23:32:24 +08:00
|
|
|
#include <asm/glue-df.h>
|
|
|
|
#include <asm/glue-pf.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/vfpmacros.h>
|
2006-06-21 20:31:52 +08:00
|
|
|
#include <asm/thread_notify.h>
|
2009-02-16 18:42:09 +08:00
|
|
|
#include <asm/unwind.h>
|
2009-11-10 07:53:29 +08:00
|
|
|
#include <asm/unistd.h>
|
2010-07-05 21:53:10 +08:00
|
|
|
#include <asm/tls.h>
|
2012-03-29 01:30:01 +08:00
|
|
|
#include <asm/system_info.h>
|
2020-05-03 20:03:54 +08:00
|
|
|
#include <asm/uaccess-asm.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#include "entry-header.S"
|
2015-01-05 19:29:25 +08:00
|
|
|
#include <asm/probes.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-05-22 01:14:44 +08:00
|
|
|
/*
|
2011-06-26 17:34:02 +08:00
|
|
|
* Interrupt handling.
|
2005-05-22 01:14:44 +08:00
|
|
|
*/
|
2021-10-05 15:15:40 +08:00
|
|
|
.macro irq_handler, from_user:req
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
mov r1, sp
|
|
|
|
ldr_this_cpu r2, irq_stack_ptr, r2, r3
|
2021-10-05 15:15:40 +08:00
|
|
|
.if \from_user == 0
|
|
|
|
@
|
|
|
|
@ If we took the interrupt while running in the kernel, we may already
|
|
|
|
@ be using the IRQ stack, so revert to the original value in that case.
|
|
|
|
@
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
subs r3, r2, r1 @ SP above bottom of IRQ stack?
|
|
|
|
rsbscs r3, r3, #THREAD_SIZE @ ... and below the top?
|
2021-09-23 15:15:53 +08:00
|
|
|
#ifdef CONFIG_VMAP_STACK
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
ldr_va r3, high_memory, cc @ End of the linear region
|
|
|
|
cmpcc r3, r1 @ Stack pointer was below it?
|
2011-09-06 16:23:26 +08:00
|
|
|
#endif
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
bcc 0f @ If not, switch to the IRQ stack
|
|
|
|
mov r0, r1
|
irq: arm: perform irqentry in entry code
In preparation for removing HANDLE_DOMAIN_IRQ_IRQENTRY, have arch/arm
perform all the irqentry accounting in its entry code.
For configurations with CONFIG_GENERIC_IRQ_MULTI_HANDLER, we can use
generic_handle_arch_irq(). Other than asm_do_IRQ(), all C calls to
handle_IRQ() are from irqchip handlers which will be called from
generic_handle_arch_irq(), so to avoid double accounting IRQ entry, the
entry logic is moved from handle_IRQ() into asm_do_IRQ().
For ARMv7M the entry assembly is tightly coupled with the NVIC irqchip, and
while the entry code should logically live under arch/arm/, moving the
entry logic there makes things more convoluted. So for now, place the
entry logic in the NVIC irqchip, but separated into a separate
function to make the split of responsibility clear.
For all other configurations without CONFIG_GENERIC_IRQ_MULTI_HANDLER,
IRQ entry is already handled in arch code, and requires no changes.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
2021-10-20 01:17:17 +08:00
|
|
|
bl generic_handle_arch_irq
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
b 1f
|
|
|
|
0:
|
|
|
|
.endif
|
2021-10-05 15:15:40 +08:00
|
|
|
|
ARM: entry: fix unwinder problems caused by IRQ stacks
The IRQ stacks series made some changes to the unwinder, to permit
unwinding across different stacks. This is needed because otherwise, the
call stack would terminate at the point where the stack switch between
the task stack and the IRQ stack occurs, which would defeat any
diagnostics that rely on timer interrupts, such as RCU stall detection.
Unfortunately, getting the unwind annotations correct turns out to be
difficult, given that this now involves a frame pointer which needs to
point into the right location in the task stack when unwinding from the
IRQ stack. Getting this wrong for an exception handling routine results
in the stack pointer to be unwound from the wrong location, causing any
subsequent unwind attempts to cause all kinds of issues, as reported by
Naresh here [0].
So let's simplify this, by deferring the stack switch to
call_with_stack(), which already has the correct unwind annotations, and
removing all the complicated handling of the stack frame from the IRQ
exception entrypoint itself.
[0] https://lore.kernel.org/all/CA+G9fYtpy8VgK+ag6OsA9TDrwi5YGU4hu7GM8xwpO7v6LrCD4Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-03-11 00:24:45 +08:00
|
|
|
mov_l r0, generic_handle_arch_irq
|
|
|
|
bl call_with_stack
|
|
|
|
1:
|
2005-05-22 01:14:44 +08:00
|
|
|
.endm
|
|
|
|
|
2011-06-26 17:22:08 +08:00
|
|
|
.macro pabt_helper
|
2011-06-26 19:37:35 +08:00
|
|
|
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
|
2011-06-26 17:22:08 +08:00
|
|
|
#ifdef MULTI_PABORT
|
2022-04-20 16:41:31 +08:00
|
|
|
ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
|
|
|
|
bl_r ip
|
2011-06-26 17:22:08 +08:00
|
|
|
#else
|
|
|
|
bl CPU_PABORT_HANDLER
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro dabt_helper
|
|
|
|
|
|
|
|
@
|
|
|
|
@ Call the processor-specific abort handler:
|
|
|
|
@
|
2011-06-26 23:01:26 +08:00
|
|
|
@ r2 - pt_regs
|
2011-06-26 21:35:07 +08:00
|
|
|
@ r4 - aborted context pc
|
|
|
|
@ r5 - aborted context psr
|
2011-06-26 17:22:08 +08:00
|
|
|
@
|
|
|
|
@ The abort handler must return the aborted address in r0, and
|
|
|
|
@ the fault status register in r1. r9 must be preserved.
|
|
|
|
@
|
|
|
|
#ifdef MULTI_DABORT
|
2022-04-20 16:41:31 +08:00
|
|
|
ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
|
|
|
|
bl_r ip
|
2011-06-26 17:22:08 +08:00
|
|
|
#else
|
|
|
|
bl CPU_DABORT_HANDLER
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
2017-11-25 07:54:22 +08:00
|
|
|
.section .entry.text,"ax",%progbits
|
2007-12-04 04:27:56 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Invalid mode handlers
|
|
|
|
*/
|
2005-06-01 05:22:32 +08:00
|
|
|
.macro inv_entry, reason
|
2016-05-10 23:34:27 +08:00
|
|
|
sub sp, sp, #PT_REGS_SIZE
|
2009-07-24 19:32:54 +08:00
|
|
|
ARM( stmib sp, {r1 - lr} )
|
|
|
|
THUMB( stmia sp, {r0 - r12} )
|
|
|
|
THUMB( str sp, [sp, #S_SP] )
|
|
|
|
THUMB( str lr, [sp, #S_LR] )
|
2005-04-17 06:20:36 +08:00
|
|
|
mov r1, #\reason
|
|
|
|
.endm
|
|
|
|
|
|
|
|
__pabt_invalid:
|
2005-06-01 05:22:32 +08:00
|
|
|
inv_entry BAD_PREFETCH
|
|
|
|
b common_invalid
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__pabt_invalid)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
__dabt_invalid:
|
2005-06-01 05:22:32 +08:00
|
|
|
inv_entry BAD_DATA
|
|
|
|
b common_invalid
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__dabt_invalid)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
__irq_invalid:
|
2005-06-01 05:22:32 +08:00
|
|
|
inv_entry BAD_IRQ
|
|
|
|
b common_invalid
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__irq_invalid)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
__und_invalid:
|
2005-06-01 05:22:32 +08:00
|
|
|
inv_entry BAD_UNDEFINSTR
|
|
|
|
|
|
|
|
@
|
|
|
|
@ XXX fall through to common_invalid
|
|
|
|
@
|
|
|
|
|
|
|
|
@
|
|
|
|
@ common_invalid - generic code for failed exception (re-entrant version of handlers)
|
|
|
|
@
|
|
|
|
common_invalid:
|
|
|
|
zero_fp
|
|
|
|
|
|
|
|
ldmia r0, {r4 - r6}
|
|
|
|
add r0, sp, #S_PC @ here for interlock avoidance
|
|
|
|
mov r7, #-1 @ "" "" "" ""
|
|
|
|
str r4, [sp] @ save preserved r0
|
|
|
|
stmia r0, {r5 - r7} @ lr_<exception>,
|
|
|
|
@ cpsr_<exception>, "old_r0"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
mov r0, sp
|
|
|
|
b bad_mode
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__und_invalid)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SVC mode handlers
|
|
|
|
*/
|
2006-01-15 00:18:08 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
|
|
|
|
#define SPFIX(code...) code
|
|
|
|
#else
|
|
|
|
#define SPFIX(code...)
|
|
|
|
#endif
|
|
|
|
|
2021-09-23 15:15:53 +08:00
|
|
|
.macro svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnstart )
|
ARM: entry: rework stack realignment code in svc_entry
The original Thumb-2 enablement patches updated the stack realignment
code in svc_entry to work around the lack of a STMIB instruction in
Thumb-2, by subtracting 4 from the frame size, inverting the sense of
the misaligment check, and changing to a STMIA instruction and a final
stack push of a 4 byte quantity that results in the stack becoming
aligned at the end of the sequence. It also pushes and pops R0 to the
stack in order to have a temp register that Thumb-2 allows in general
purpose ALU instructions, as TST using SP is not permitted.
Both are a bit problematic for vmap'ed stacks, as using the stack is
only permitted after we decide that we did not overflow the stack, or
have already switched to the overflow stack.
As for the alignment check: the current approach creates a corner case
where, if the initial SUB of SP ends up right at the start of the stack,
we will end up subtracting another 8 bytes and overflowing it. This
means we would need to add the overflow check *after* the SUB that
deliberately misaligns the stack. However, this would require us to keep
local state (i.e., whether we performed the subtract or not) across the
overflow check, but without any GPRs or stack available.
So let's switch to an approach where we don't use the stack, and where
the alignment check of the stack pointer occurs in the usual way, as
this is guaranteed not to result in overflow. This means we will be able
to do the overflow check first.
While at it, switch to R1 so the mode stack pointer in R0 remains
accessible.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 23:23:47 +08:00
|
|
|
sub sp, sp, #(SVC_REGS_SIZE + \stack_hole)
|
2021-09-23 15:15:53 +08:00
|
|
|
THUMB( add sp, r1 ) @ get SP in a GPR without
|
|
|
|
THUMB( sub r1, sp, r1 ) @ using a temp register
|
|
|
|
|
|
|
|
.if \overflow_check
|
|
|
|
UNWIND(.save {r0 - pc} )
|
|
|
|
do_overflow_check (SVC_REGS_SIZE + \stack_hole)
|
|
|
|
.endif
|
|
|
|
|
2009-07-24 19:32:54 +08:00
|
|
|
#ifdef CONFIG_THUMB2_KERNEL
|
ARM: entry: rework stack realignment code in svc_entry
The original Thumb-2 enablement patches updated the stack realignment
code in svc_entry to work around the lack of a STMIB instruction in
Thumb-2, by subtracting 4 from the frame size, inverting the sense of
the misaligment check, and changing to a STMIA instruction and a final
stack push of a 4 byte quantity that results in the stack becoming
aligned at the end of the sequence. It also pushes and pops R0 to the
stack in order to have a temp register that Thumb-2 allows in general
purpose ALU instructions, as TST using SP is not permitted.
Both are a bit problematic for vmap'ed stacks, as using the stack is
only permitted after we decide that we did not overflow the stack, or
have already switched to the overflow stack.
As for the alignment check: the current approach creates a corner case
where, if the initial SUB of SP ends up right at the start of the stack,
we will end up subtracting another 8 bytes and overflowing it. This
means we would need to add the overflow check *after* the SUB that
deliberately misaligns the stack. However, this would require us to keep
local state (i.e., whether we performed the subtract or not) across the
overflow check, but without any GPRs or stack available.
So let's switch to an approach where we don't use the stack, and where
the alignment check of the stack pointer occurs in the usual way, as
this is guaranteed not to result in overflow. This means we will be able
to do the overflow check first.
While at it, switch to R1 so the mode stack pointer in R0 remains
accessible.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 23:23:47 +08:00
|
|
|
tst r1, #4 @ test stack pointer alignment
|
|
|
|
sub r1, sp, r1 @ restore original R1
|
|
|
|
sub sp, r1 @ restore original SP
|
2009-07-24 19:32:54 +08:00
|
|
|
#else
|
2006-01-15 00:18:08 +08:00
|
|
|
SPFIX( tst sp, #4 )
|
2009-07-24 19:32:54 +08:00
|
|
|
#endif
|
ARM: entry: rework stack realignment code in svc_entry
The original Thumb-2 enablement patches updated the stack realignment
code in svc_entry to work around the lack of a STMIB instruction in
Thumb-2, by subtracting 4 from the frame size, inverting the sense of
the misaligment check, and changing to a STMIA instruction and a final
stack push of a 4 byte quantity that results in the stack becoming
aligned at the end of the sequence. It also pushes and pops R0 to the
stack in order to have a temp register that Thumb-2 allows in general
purpose ALU instructions, as TST using SP is not permitted.
Both are a bit problematic for vmap'ed stacks, as using the stack is
only permitted after we decide that we did not overflow the stack, or
have already switched to the overflow stack.
As for the alignment check: the current approach creates a corner case
where, if the initial SUB of SP ends up right at the start of the stack,
we will end up subtracting another 8 bytes and overflowing it. This
means we would need to add the overflow check *after* the SUB that
deliberately misaligns the stack. However, this would require us to keep
local state (i.e., whether we performed the subtract or not) across the
overflow check, but without any GPRs or stack available.
So let's switch to an approach where we don't use the stack, and where
the alignment check of the stack pointer occurs in the usual way, as
this is guaranteed not to result in overflow. This means we will be able
to do the overflow check first.
While at it, switch to R1 so the mode stack pointer in R0 remains
accessible.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 23:23:47 +08:00
|
|
|
SPFIX( subne sp, sp, #4 )
|
|
|
|
|
|
|
|
ARM( stmib sp, {r1 - r12} )
|
|
|
|
THUMB( stmia sp, {r0 - r12} ) @ No STMIB in Thumb-2
|
2005-06-01 05:22:32 +08:00
|
|
|
|
2011-06-25 22:44:20 +08:00
|
|
|
ldmia r0, {r3 - r5}
|
ARM: entry: rework stack realignment code in svc_entry
The original Thumb-2 enablement patches updated the stack realignment
code in svc_entry to work around the lack of a STMIB instruction in
Thumb-2, by subtracting 4 from the frame size, inverting the sense of
the misaligment check, and changing to a STMIA instruction and a final
stack push of a 4 byte quantity that results in the stack becoming
aligned at the end of the sequence. It also pushes and pops R0 to the
stack in order to have a temp register that Thumb-2 allows in general
purpose ALU instructions, as TST using SP is not permitted.
Both are a bit problematic for vmap'ed stacks, as using the stack is
only permitted after we decide that we did not overflow the stack, or
have already switched to the overflow stack.
As for the alignment check: the current approach creates a corner case
where, if the initial SUB of SP ends up right at the start of the stack,
we will end up subtracting another 8 bytes and overflowing it. This
means we would need to add the overflow check *after* the SUB that
deliberately misaligns the stack. However, this would require us to keep
local state (i.e., whether we performed the subtract or not) across the
overflow check, but without any GPRs or stack available.
So let's switch to an approach where we don't use the stack, and where
the alignment check of the stack pointer occurs in the usual way, as
this is guaranteed not to result in overflow. This means we will be able
to do the overflow check first.
While at it, switch to R1 so the mode stack pointer in R0 remains
accessible.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 23:23:47 +08:00
|
|
|
add r7, sp, #S_SP @ here for interlock avoidance
|
2011-06-25 22:44:20 +08:00
|
|
|
mov r6, #-1 @ "" "" "" ""
|
ARM: entry: rework stack realignment code in svc_entry
The original Thumb-2 enablement patches updated the stack realignment
code in svc_entry to work around the lack of a STMIB instruction in
Thumb-2, by subtracting 4 from the frame size, inverting the sense of
the misaligment check, and changing to a STMIA instruction and a final
stack push of a 4 byte quantity that results in the stack becoming
aligned at the end of the sequence. It also pushes and pops R0 to the
stack in order to have a temp register that Thumb-2 allows in general
purpose ALU instructions, as TST using SP is not permitted.
Both are a bit problematic for vmap'ed stacks, as using the stack is
only permitted after we decide that we did not overflow the stack, or
have already switched to the overflow stack.
As for the alignment check: the current approach creates a corner case
where, if the initial SUB of SP ends up right at the start of the stack,
we will end up subtracting another 8 bytes and overflowing it. This
means we would need to add the overflow check *after* the SUB that
deliberately misaligns the stack. However, this would require us to keep
local state (i.e., whether we performed the subtract or not) across the
overflow check, but without any GPRs or stack available.
So let's switch to an approach where we don't use the stack, and where
the alignment check of the stack pointer occurs in the usual way, as
this is guaranteed not to result in overflow. This means we will be able
to do the overflow check first.
While at it, switch to R1 so the mode stack pointer in R0 remains
accessible.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 23:23:47 +08:00
|
|
|
add r2, sp, #(SVC_REGS_SIZE + \stack_hole)
|
|
|
|
SPFIX( addne r2, r2, #4 )
|
|
|
|
str r3, [sp] @ save the "real" r0 copied
|
2005-06-01 05:22:32 +08:00
|
|
|
@ from the exception stack
|
|
|
|
|
2011-06-25 22:44:20 +08:00
|
|
|
mov r3, lr
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
@
|
|
|
|
@ We are now ready to fill in the remaining blanks on the stack:
|
|
|
|
@
|
2011-06-25 22:44:20 +08:00
|
|
|
@ r2 - sp_svc
|
|
|
|
@ r3 - lr_svc
|
|
|
|
@ r4 - lr_<exception>, already fixed up for correct return/restart
|
|
|
|
@ r5 - spsr_<exception>
|
|
|
|
@ r6 - orig_r0 (see pt_regs definition in ptrace.h)
|
2005-04-17 06:20:36 +08:00
|
|
|
@
|
2011-06-25 22:44:20 +08:00
|
|
|
stmia r7, {r2 - r6}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-05-13 18:40:20 +08:00
|
|
|
get_thread_info tsk
|
2020-05-03 20:03:54 +08:00
|
|
|
uaccess_entry tsk, r0, r1, r2, \uaccess
|
2015-08-20 17:32:02 +08:00
|
|
|
|
2014-09-18 00:12:06 +08:00
|
|
|
.if \trace
|
2011-06-25 18:44:06 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
bl trace_hardirqs_off
|
|
|
|
#endif
|
2014-09-18 00:12:06 +08:00
|
|
|
.endif
|
2011-06-26 00:35:19 +08:00
|
|
|
.endm
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-06-26 00:35:19 +08:00
|
|
|
.align 5
|
|
|
|
__dabt_svc:
|
2015-08-20 17:32:02 +08:00
|
|
|
svc_entry uaccess=0
|
2005-04-17 06:20:36 +08:00
|
|
|
mov r2, sp
|
2011-06-26 23:01:26 +08:00
|
|
|
dabt_helper
|
2013-11-04 18:42:29 +08:00
|
|
|
THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR
|
2011-06-25 22:44:20 +08:00
|
|
|
svc_exit r5 @ return from exception
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__dabt_svc)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.align 5
|
|
|
|
__irq_svc:
|
2005-06-01 05:22:32 +08:00
|
|
|
svc_entry
|
2021-10-05 15:15:40 +08:00
|
|
|
irq_handler from_user=0
|
2011-06-25 17:57:57 +08:00
|
|
|
|
2019-10-16 03:17:48 +08:00
|
|
|
#ifdef CONFIG_PREEMPTION
|
2011-06-25 17:57:57 +08:00
|
|
|
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
|
2005-05-22 01:15:45 +08:00
|
|
|
ldr r0, [tsk, #TI_FLAGS] @ get flags
|
2008-04-14 00:47:35 +08:00
|
|
|
teq r8, #0 @ if preempt count != 0
|
|
|
|
movne r0, #0 @ force flags to 0
|
2005-04-17 06:20:36 +08:00
|
|
|
tst r0, #_TIF_NEED_RESCHED
|
|
|
|
blne svc_preempt
|
|
|
|
#endif
|
2011-06-26 19:47:08 +08:00
|
|
|
|
2013-03-28 20:57:40 +08:00
|
|
|
svc_exit r5, irq = 1 @ return from exception
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__irq_svc)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.ltorg
|
|
|
|
|
2019-10-16 03:17:48 +08:00
|
|
|
#ifdef CONFIG_PREEMPTION
|
2005-04-17 06:20:36 +08:00
|
|
|
svc_preempt:
|
2008-04-14 00:47:35 +08:00
|
|
|
mov r8, lr
|
2005-04-17 06:20:36 +08:00
|
|
|
1: bl preempt_schedule_irq @ irq en/disable is done inside
|
2005-05-22 01:15:45 +08:00
|
|
|
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
|
2005-04-17 06:20:36 +08:00
|
|
|
tst r0, #_TIF_NEED_RESCHED
|
2014-06-30 23:29:12 +08:00
|
|
|
reteq r8 @ go again
|
2005-04-17 06:20:36 +08:00
|
|
|
b 1b
|
|
|
|
#endif
|
|
|
|
|
2012-07-31 02:42:10 +08:00
|
|
|
__und_fault:
|
|
|
|
@ Correct the PC such that it is pointing at the instruction
|
|
|
|
@ which caused the fault. If the faulting instruction was ARM
|
|
|
|
@ the PC will be pointing at the next instruction, and have to
|
|
|
|
@ subtract 4. Otherwise, it is Thumb, and the PC will be
|
|
|
|
@ pointing at the second half of the Thumb instruction. We
|
|
|
|
@ have to subtract 2.
|
|
|
|
ldr r2, [r0, #S_PC]
|
|
|
|
sub r2, r2, r1
|
|
|
|
str r2, [r0, #S_PC]
|
|
|
|
b do_undefinstr
|
|
|
|
ENDPROC(__und_fault)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
.align 5
|
|
|
|
__und_svc:
|
2007-12-15 04:56:01 +08:00
|
|
|
#ifdef CONFIG_KPROBES
|
|
|
|
@ If a kprobe is about to simulate a "stmdb sp..." instruction,
|
|
|
|
@ it obviously needs free stack space which then will belong to
|
|
|
|
@ the saved context.
|
2015-01-05 19:29:25 +08:00
|
|
|
svc_entry MAX_STACK_SIZE
|
2007-12-15 04:56:01 +08:00
|
|
|
#else
|
2005-06-01 05:22:32 +08:00
|
|
|
svc_entry
|
2007-12-15 04:56:01 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-07-31 02:42:10 +08:00
|
|
|
mov r1, #4 @ PC correction to apply
|
2020-11-20 01:09:16 +08:00
|
|
|
THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode?
|
|
|
|
THUMB( movne r1, #2 ) @ if so, fix up PC correction
|
2005-04-17 06:20:36 +08:00
|
|
|
mov r0, sp @ struct pt_regs *regs
|
2012-07-31 02:42:10 +08:00
|
|
|
bl __und_fault
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-07-31 02:42:10 +08:00
|
|
|
__und_svc_finish:
|
2016-08-03 17:33:35 +08:00
|
|
|
get_thread_info tsk
|
2011-06-25 22:44:20 +08:00
|
|
|
ldr r5, [sp, #S_PSR] @ Get SVC cpsr
|
|
|
|
svc_exit r5 @ return from exception
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__und_svc)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.align 5
|
|
|
|
__pabt_svc:
|
2005-06-01 05:22:32 +08:00
|
|
|
svc_entry
|
2009-09-25 20:39:47 +08:00
|
|
|
mov r2, sp @ regs
|
2011-06-26 19:37:35 +08:00
|
|
|
pabt_helper
|
2011-06-25 22:44:20 +08:00
|
|
|
svc_exit r5 @ return from exception
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__pabt_svc)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-09-18 00:12:06 +08:00
|
|
|
.align 5
|
|
|
|
__fiq_svc:
|
|
|
|
svc_entry trace=0
|
|
|
|
mov r0, sp @ struct pt_regs *regs
|
|
|
|
bl handle_fiq_as_nmi
|
|
|
|
svc_exit_via_fiq
|
|
|
|
UNWIND(.fnend )
|
|
|
|
ENDPROC(__fiq_svc)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Abort mode handlers
|
|
|
|
*/
|
|
|
|
|
|
|
|
@
|
|
|
|
@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode
|
|
|
|
@ and reuses the same macros. However in abort mode we must also
|
|
|
|
@ save/restore lr_abt and spsr_abt to make nested aborts safe.
|
|
|
|
@
|
|
|
|
.align 5
|
|
|
|
__fiq_abt:
|
|
|
|
svc_entry trace=0
|
|
|
|
|
|
|
|
ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( msr cpsr_c, r0 )
|
|
|
|
mov r1, lr @ Save lr_abt
|
|
|
|
mrs r2, spsr @ Save spsr_abt, abort is now safe
|
|
|
|
ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( msr cpsr_c, r0 )
|
|
|
|
stmfd sp!, {r1 - r2}
|
|
|
|
|
|
|
|
add r0, sp, #8 @ struct pt_regs *regs
|
|
|
|
bl handle_fiq_as_nmi
|
|
|
|
|
|
|
|
ldmfd sp!, {r1 - r2}
|
|
|
|
ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( msr cpsr_c, r0 )
|
|
|
|
mov lr, r1 @ Restore lr_abt, abort is unsafe
|
|
|
|
msr spsr_cxsf, r2 @ Restore spsr_abt
|
|
|
|
ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
|
|
|
|
THUMB( msr cpsr_c, r0 )
|
|
|
|
|
|
|
|
svc_exit_via_fiq
|
|
|
|
UNWIND(.fnend )
|
|
|
|
ENDPROC(__fiq_abt)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* User mode handlers
|
2006-01-15 00:18:08 +08:00
|
|
|
*
|
2016-05-10 23:34:27 +08:00
|
|
|
* EABI note: sp_svc is always 64-bit aligned here, so should PT_REGS_SIZE
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2006-01-15 00:18:08 +08:00
|
|
|
|
2016-05-10 23:34:27 +08:00
|
|
|
#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (PT_REGS_SIZE & 7)
|
2006-01-15 00:18:08 +08:00
|
|
|
#error "sizeof(struct pt_regs) must be a multiple of 8"
|
|
|
|
#endif
|
|
|
|
|
2015-08-20 17:32:02 +08:00
|
|
|
.macro usr_entry, trace=1, uaccess=1
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnstart )
|
|
|
|
UNWIND(.cantunwind ) @ don't unwind the user space
|
2016-05-10 23:34:27 +08:00
|
|
|
sub sp, sp, #PT_REGS_SIZE
|
2009-07-24 19:32:54 +08:00
|
|
|
ARM( stmib sp, {r1 - r12} )
|
|
|
|
THUMB( stmia sp, {r0 - r12} )
|
2005-06-01 05:22:32 +08:00
|
|
|
|
2014-08-28 20:08:14 +08:00
|
|
|
ATRAP( mrc p15, 0, r7, c1, c0, 0)
|
2022-04-20 16:41:31 +08:00
|
|
|
ATRAP( ldr_va r8, cr_alignment)
|
2014-08-28 20:08:14 +08:00
|
|
|
|
2011-06-25 22:44:20 +08:00
|
|
|
ldmia r0, {r3 - r5}
|
2005-06-01 05:22:32 +08:00
|
|
|
add r0, sp, #S_PC @ here for interlock avoidance
|
2011-06-25 22:44:20 +08:00
|
|
|
mov r6, #-1 @ "" "" "" ""
|
2005-06-01 05:22:32 +08:00
|
|
|
|
2011-06-25 22:44:20 +08:00
|
|
|
str r3, [sp] @ save the "real" r0 copied
|
2005-06-01 05:22:32 +08:00
|
|
|
@ from the exception stack
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
@
|
|
|
|
@ We are now ready to fill in the remaining blanks on the stack:
|
|
|
|
@
|
2011-06-25 22:44:20 +08:00
|
|
|
@ r4 - lr_<exception>, already fixed up for correct return/restart
|
|
|
|
@ r5 - spsr_<exception>
|
|
|
|
@ r6 - orig_r0 (see pt_regs definition in ptrace.h)
|
2005-04-17 06:20:36 +08:00
|
|
|
@
|
|
|
|
@ Also, separately save sp_usr and lr_usr
|
|
|
|
@
|
2011-06-25 22:44:20 +08:00
|
|
|
stmia r0, {r4 - r6}
|
2009-07-24 19:32:54 +08:00
|
|
|
ARM( stmdb r0, {sp, lr}^ )
|
|
|
|
THUMB( store_user_sp_lr r0, r1, S_SP - S_PC )
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2015-08-20 17:32:02 +08:00
|
|
|
.if \uaccess
|
|
|
|
uaccess_disable ip
|
|
|
|
.endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
@ Enable the alignment trap while in kernel mode
|
2014-08-28 20:08:14 +08:00
|
|
|
ATRAP( teq r8, r7)
|
|
|
|
ATRAP( mcrne p15, 0, r8, c1, c0, 0)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
ARM: smp: Store current pointer in TPIDRURO register if available
Now that the user space TLS register is assigned on every return to user
space, we can use it to keep the 'current' pointer while running in the
kernel. This removes the need to access it via thread_info, which is
located at the base of the stack, but will be moved out of there in a
subsequent patch.
Use the __builtin_thread_pointer() helper when available - this will
help GCC understand that reloading the value within the same function is
not necessary, even when using the per-task stack protector (which also
generates accesses via the TLS register). For example, the generated
code below loads TPIDRURO only once, and uses it to access both the
stack canary and the preempt_count fields.
<do_one_initcall>:
e92d 41f0 stmdb sp!, {r4, r5, r6, r7, r8, lr}
ee1d 4f70 mrc 15, 0, r4, cr13, cr0, {3}
4606 mov r6, r0
b094 sub sp, #80 ; 0x50
f8d4 34e8 ldr.w r3, [r4, #1256] ; 0x4e8 <- stack canary
9313 str r3, [sp, #76] ; 0x4c
f8d4 8004 ldr.w r8, [r4, #4] <- preempt count
Co-developed-by: Keith Packard <keithpac@amazon.com>
Signed-off-by: Keith Packard <keithpac@amazon.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
2021-09-18 16:44:37 +08:00
|
|
|
reload_current r7, r8
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
@
|
|
|
|
@ Clear FP to mark the first stack frame
|
|
|
|
@
|
|
|
|
zero_fp
|
2011-06-26 00:35:19 +08:00
|
|
|
|
2014-09-18 00:12:06 +08:00
|
|
|
.if \trace
|
2015-07-03 19:42:36 +08:00
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
2011-06-26 00:35:19 +08:00
|
|
|
bl trace_hardirqs_off
|
|
|
|
#endif
|
2013-03-29 05:54:40 +08:00
|
|
|
ct_user_exit save = 0
|
2014-09-18 00:12:06 +08:00
|
|
|
.endif
|
2005-04-17 06:20:36 +08:00
|
|
|
.endm
|
|
|
|
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
.macro kuser_cmpxchg_check
|
2015-09-22 02:34:28 +08:00
|
|
|
#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS)
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
#ifndef CONFIG_MMU
|
|
|
|
#warning "NPTL on non MMU needs fixing"
|
|
|
|
#else
|
|
|
|
@ Make sure our user space atomic helper is restarted
|
|
|
|
@ if it was interrupted in a critical region. Here we
|
|
|
|
@ perform a quick test inline since it should be false
|
|
|
|
@ 99.9999% of the time. The rest is done out of line.
|
ARM: 9015/2: Define the virtual space of KASan's shadow region
Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for
the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB
addressable by a 32bit architecture) out of the virtual address
space to use as shadow memory for KASan as follows:
+----+ 0xffffffff
| |
| | |-> Static kernel image (vmlinux) BSS and page table
| |/
+----+ PAGE_OFFSET
| |
| | |-> Loadable kernel modules virtual address space area
| |/
+----+ MODULES_VADDR = KASAN_SHADOW_END
| |
| | |-> The shadow area of kernel virtual address.
| |/
+----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the
| | shadow address of MODULES_VADDR
| | |
| | |
| | |-> The user space area in lowmem. The kernel address
| | | sanitizer do not use this space, nor does it map it.
| | |
| | |
| | |
| | |
| |/
------ 0
0 .. TASK_SIZE is the memory that can be used by shared
userspace/kernelspace. It us used for userspace processes and for
passing parameters and memory buffers in system calls etc. We do not
need to shadow this area.
KASAN_SHADOW_START:
This value begins with the MODULE_VADDR's shadow address. It is the
start of kernel virtual space. Since we have modules to load, we need
to cover also that area with shadow memory so we can find memory
bugs in modules.
KASAN_SHADOW_END
This value is the 0x100000000's shadow address: the mapping that would
be after the end of the kernel memory at 0xffffffff. It is the end of
kernel address sanitizer shadow area. It is also the start of the
module area.
KASAN_SHADOW_OFFSET:
This value is used to map an address to the corresponding shadow
address by the following formula:
shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
As you would expect, >> 3 is equal to dividing by 8, meaning each
byte in the shadow memory covers 8 bytes of kernel memory, so one
bit shadow memory per byte of kernel memory is used.
The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending
on the VMSPLIT layout of the system: the kernel and userspace can
split up lowmem in different ways according to needs, so we calculate
the shadow offset depending on this.
When kasan is enabled, the definition of TASK_SIZE is not an 8-bit
rotated constant, so we need to modify the TASK_SIZE access code in the
*.s file.
The kernel and modules may use different amounts of memory,
according to the VMSPLIT configuration, which in turn
determines the PAGE_OFFSET.
We use the following KASAN_SHADOW_OFFSETs depending on how the
virtual memory is split up:
- 0x1f000000 if we have 1G userspace / 3G kernelspace split:
- The kernel address space is 3G (0xc0000000)
- PAGE_OFFSET is then set to 0x40000000 so the kernel static
image (vmlinux) uses addresses 0x40000000 .. 0xffffffff
- On top of that we have the MODULES_VADDR which under
the worst case (using ARM instructions) is
PAGE_OFFSET - 16M (0x01000000) = 0x3f000000
so the modules use addresses 0x3f000000 .. 0x3fffffff
- So the addresses 0x3f000000 .. 0xffffffff need to be
covered with shadow memory. That is 0xc1000000 bytes
of memory.
- 1/8 of that is needed for its shadow memory, so
0x18200000 bytes of shadow memory is needed. We
"steal" that from the remaining lowmem.
- The KASAN_SHADOW_START becomes 0x26e00000, to
KASAN_SHADOW_END at 0x3effffff.
- Now we can calculate the KASAN_SHADOW_OFFSET for any
kernel address as 0x3f000000 needs to map to the first
byte of shadow memory and 0xffffffff needs to map to
the last byte of shadow memory. Since:
SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
0x26e00000 = (0x3f000000 >> 3) + KASAN_SHADOW_OFFSET
KASAN_SHADOW_OFFSET = 0x26e00000 - (0x3f000000 >> 3)
KASAN_SHADOW_OFFSET = 0x26e00000 - 0x07e00000
KASAN_SHADOW_OFFSET = 0x1f000000
- 0x5f000000 if we have 2G userspace / 2G kernelspace split:
- The kernel space is 2G (0x80000000)
- PAGE_OFFSET is set to 0x80000000 so the kernel static
image uses 0x80000000 .. 0xffffffff.
- On top of that we have the MODULES_VADDR which under
the worst case (using ARM instructions) is
PAGE_OFFSET - 16M (0x01000000) = 0x7f000000
so the modules use addresses 0x7f000000 .. 0x7fffffff
- So the addresses 0x7f000000 .. 0xffffffff need to be
covered with shadow memory. That is 0x81000000 bytes
of memory.
- 1/8 of that is needed for its shadow memory, so
0x10200000 bytes of shadow memory is needed. We
"steal" that from the remaining lowmem.
- The KASAN_SHADOW_START becomes 0x6ee00000, to
KASAN_SHADOW_END at 0x7effffff.
- Now we can calculate the KASAN_SHADOW_OFFSET for any
kernel address as 0x7f000000 needs to map to the first
byte of shadow memory and 0xffffffff needs to map to
the last byte of shadow memory. Since:
SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
0x6ee00000 = (0x7f000000 >> 3) + KASAN_SHADOW_OFFSET
KASAN_SHADOW_OFFSET = 0x6ee00000 - (0x7f000000 >> 3)
KASAN_SHADOW_OFFSET = 0x6ee00000 - 0x0fe00000
KASAN_SHADOW_OFFSET = 0x5f000000
- 0x9f000000 if we have 3G userspace / 1G kernelspace split,
and this is the default split for ARM:
- The kernel address space is 1GB (0x40000000)
- PAGE_OFFSET is set to 0xc0000000 so the kernel static
image uses 0xc0000000 .. 0xffffffff.
- On top of that we have the MODULES_VADDR which under
the worst case (using ARM instructions) is
PAGE_OFFSET - 16M (0x01000000) = 0xbf000000
so the modules use addresses 0xbf000000 .. 0xbfffffff
- So the addresses 0xbf000000 .. 0xffffffff need to be
covered with shadow memory. That is 0x41000000 bytes
of memory.
- 1/8 of that is needed for its shadow memory, so
0x08200000 bytes of shadow memory is needed. We
"steal" that from the remaining lowmem.
- The KASAN_SHADOW_START becomes 0xb6e00000, to
KASAN_SHADOW_END at 0xbfffffff.
- Now we can calculate the KASAN_SHADOW_OFFSET for any
kernel address as 0xbf000000 needs to map to the first
byte of shadow memory and 0xffffffff needs to map to
the last byte of shadow memory. Since:
SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
0xb6e00000 = (0xbf000000 >> 3) + KASAN_SHADOW_OFFSET
KASAN_SHADOW_OFFSET = 0xb6e00000 - (0xbf000000 >> 3)
KASAN_SHADOW_OFFSET = 0xb6e00000 - 0x17e00000
KASAN_SHADOW_OFFSET = 0x9f000000
- 0x8f000000 if we have 3G userspace / 1G kernelspace with
full 1 GB low memory (VMSPLIT_3G_OPT):
- The kernel address space is 1GB (0x40000000)
- PAGE_OFFSET is set to 0xb0000000 so the kernel static
image uses 0xb0000000 .. 0xffffffff.
- On top of that we have the MODULES_VADDR which under
the worst case (using ARM instructions) is
PAGE_OFFSET - 16M (0x01000000) = 0xaf000000
so the modules use addresses 0xaf000000 .. 0xaffffff
- So the addresses 0xaf000000 .. 0xffffffff need to be
covered with shadow memory. That is 0x51000000 bytes
of memory.
- 1/8 of that is needed for its shadow memory, so
0x0a200000 bytes of shadow memory is needed. We
"steal" that from the remaining lowmem.
- The KASAN_SHADOW_START becomes 0xa4e00000, to
KASAN_SHADOW_END at 0xaeffffff.
- Now we can calculate the KASAN_SHADOW_OFFSET for any
kernel address as 0xaf000000 needs to map to the first
byte of shadow memory and 0xffffffff needs to map to
the last byte of shadow memory. Since:
SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
0xa4e00000 = (0xaf000000 >> 3) + KASAN_SHADOW_OFFSET
KASAN_SHADOW_OFFSET = 0xa4e00000 - (0xaf000000 >> 3)
KASAN_SHADOW_OFFSET = 0xa4e00000 - 0x15e00000
KASAN_SHADOW_OFFSET = 0x8f000000
- The default value of 0xffffffff for KASAN_SHADOW_OFFSET
is an error value. We should always match one of the
above shadow offsets.
When we do this, TASK_SIZE will sometimes get a bit odd values
that will not fit into immediate mov assembly instructions.
To account for this, we need to rewrite some assembly using
TASK_SIZE like this:
- mov r1, #TASK_SIZE
+ ldr r1, =TASK_SIZE
or
- cmp r4, #TASK_SIZE
+ ldr r0, =TASK_SIZE
+ cmp r4, r0
this is done to avoid the immediate #TASK_SIZE that need to
fit into a limited number of bits.
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Reported-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2020-10-26 06:53:46 +08:00
|
|
|
ldr r0, =TASK_SIZE
|
|
|
|
cmp r4, r0
|
2011-06-20 11:36:03 +08:00
|
|
|
blhs kuser_cmpxchg64_fixup
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
.align 5
|
|
|
|
__dabt_usr:
|
2015-08-20 17:32:02 +08:00
|
|
|
usr_entry uaccess=0
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
kuser_cmpxchg_check
|
2005-04-17 06:20:36 +08:00
|
|
|
mov r2, sp
|
2011-06-26 23:01:26 +08:00
|
|
|
dabt_helper
|
|
|
|
b ret_from_exception
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__dabt_usr)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.align 5
|
|
|
|
__irq_usr:
|
2005-06-01 05:22:32 +08:00
|
|
|
usr_entry
|
2011-06-26 01:28:19 +08:00
|
|
|
kuser_cmpxchg_check
|
2021-10-05 15:15:40 +08:00
|
|
|
irq_handler from_user=1
|
2011-06-25 17:57:57 +08:00
|
|
|
get_thread_info tsk
|
2005-04-17 06:20:36 +08:00
|
|
|
mov why, #0
|
2011-06-05 09:24:58 +08:00
|
|
|
b ret_to_user_from_irq
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__irq_usr)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.ltorg
|
|
|
|
|
|
|
|
.align 5
|
|
|
|
__und_usr:
|
2015-08-20 17:32:02 +08:00
|
|
|
usr_entry uaccess=0
|
2011-06-26 01:28:19 +08:00
|
|
|
|
2014-04-22 23:14:29 +08:00
|
|
|
@ IRQs must be enabled before attempting to read the instruction from
|
|
|
|
@ user space since that could cause a page/translation fault if the
|
|
|
|
@ page table was modified by another CPU.
|
|
|
|
enable_irq
|
|
|
|
|
2023-03-19 22:18:25 +08:00
|
|
|
tst r5, #PSR_T_BIT @ Thumb mode?
|
|
|
|
mov r1, #2 @ set insn size to 2 for Thumb
|
|
|
|
bne 0f @ handle as Thumb undef exception
|
2023-03-20 07:25:18 +08:00
|
|
|
#ifdef CONFIG_FPE_NWFPE
|
2023-03-19 22:18:25 +08:00
|
|
|
adr r9, ret_from_exception
|
|
|
|
bl call_fpe @ returns via R9 on success
|
2011-08-20 01:00:08 +08:00
|
|
|
#endif
|
2023-03-19 22:18:25 +08:00
|
|
|
mov r1, #4 @ set insn size to 4 for ARM
|
|
|
|
0: mov r0, sp
|
2015-08-20 17:32:02 +08:00
|
|
|
uaccess_disable ip
|
2023-03-19 22:18:25 +08:00
|
|
|
bl __und_fault
|
|
|
|
b ret_from_exception
|
2012-07-31 02:42:10 +08:00
|
|
|
UNWIND(.fnend)
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__und_usr)
|
2008-04-19 05:43:08 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
.align 5
|
|
|
|
__pabt_usr:
|
2005-06-01 05:22:32 +08:00
|
|
|
usr_entry
|
2009-09-25 20:39:47 +08:00
|
|
|
mov r2, sp @ regs
|
2011-06-26 19:37:35 +08:00
|
|
|
pabt_helper
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2005-04-17 06:20:36 +08:00
|
|
|
/* fall through */
|
|
|
|
/*
|
|
|
|
* This is the return code to user mode for abort handlers
|
|
|
|
*/
|
|
|
|
ENTRY(ret_from_exception)
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnstart )
|
|
|
|
UNWIND(.cantunwind )
|
2005-04-17 06:20:36 +08:00
|
|
|
get_thread_info tsk
|
|
|
|
mov why, #0
|
|
|
|
b ret_to_user
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__pabt_usr)
|
|
|
|
ENDPROC(ret_from_exception)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-09-18 00:12:06 +08:00
|
|
|
.align 5
|
|
|
|
__fiq_usr:
|
|
|
|
usr_entry trace=0
|
|
|
|
kuser_cmpxchg_check
|
|
|
|
mov r0, sp @ struct pt_regs *regs
|
|
|
|
bl handle_fiq_as_nmi
|
|
|
|
get_thread_info tsk
|
|
|
|
restore_user_regs fast = 0, offset = 0
|
|
|
|
UNWIND(.fnend )
|
|
|
|
ENDPROC(__fiq_usr)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Register switch for ARMv3 and ARMv4 processors
|
|
|
|
* r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
|
|
|
|
* previous and next are guaranteed not to be the same.
|
|
|
|
*/
|
|
|
|
ENTRY(__switch_to)
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnstart )
|
|
|
|
UNWIND(.cantunwind )
|
2005-04-17 06:20:36 +08:00
|
|
|
add ip, r1, #TI_CPU_SAVE
|
2009-07-24 19:32:54 +08:00
|
|
|
ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack
|
|
|
|
THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
|
|
|
|
THUMB( str sp, [ip], #4 )
|
|
|
|
THUMB( str lr, [ip], #4 )
|
2013-06-19 06:23:26 +08:00
|
|
|
ldr r4, [r2, #TI_TP_VALUE]
|
|
|
|
ldr r5, [r2, #TI_TP_VALUE + 4]
|
2010-09-13 23:03:21 +08:00
|
|
|
#ifdef CONFIG_CPU_USE_DOMAINS
|
2015-08-20 04:23:48 +08:00
|
|
|
mrc p15, 0, r6, c3, c0, 0 @ Get domain register
|
|
|
|
str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register
|
2006-06-21 20:31:52 +08:00
|
|
|
ldr r6, [r2, #TI_CPU_DOMAIN]
|
2006-01-14 05:05:25 +08:00
|
|
|
#endif
|
2013-06-19 06:23:26 +08:00
|
|
|
switch_tls r1, r4, r5, r3, r7
|
2021-11-24 19:09:48 +08:00
|
|
|
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \
|
|
|
|
!defined(CONFIG_STACKPROTECTOR_PER_TASK)
|
2010-06-08 09:50:33 +08:00
|
|
|
ldr r8, =__stack_chk_guard
|
2017-07-01 00:03:59 +08:00
|
|
|
.if (TSK_STACK_CANARY > IMM12_MASK)
|
ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems
On UP systems, only a single task can be 'current' at the same time,
which means we can use a global variable to track it. This means we can
also enable THREAD_INFO_IN_TASK for those systems, as in that case,
thread_info is accessed via current rather than the other way around,
removing the need to store thread_info at the base of the task stack.
This, in turn, permits us to enable IRQ stacks and vmap'ed stacks on UP
systems as well.
To partially mitigate the performance overhead of this arrangement, use
a ADD/ADD/LDR sequence with the appropriate PC-relative group
relocations to load the value of current when needed. This means that
accessing current will still only require a single load as before,
avoiding the need for a literal to carry the address of the global
variable in each function. However, accessing thread_info will now
require this load as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-11-24 21:08:11 +08:00
|
|
|
add r9, r2, #TSK_STACK_CANARY & ~IMM12_MASK
|
2021-11-24 19:09:48 +08:00
|
|
|
ldr r9, [r9, #TSK_STACK_CANARY & IMM12_MASK]
|
ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems
On UP systems, only a single task can be 'current' at the same time,
which means we can use a global variable to track it. This means we can
also enable THREAD_INFO_IN_TASK for those systems, as in that case,
thread_info is accessed via current rather than the other way around,
removing the need to store thread_info at the base of the task stack.
This, in turn, permits us to enable IRQ stacks and vmap'ed stacks on UP
systems as well.
To partially mitigate the performance overhead of this arrangement, use
a ADD/ADD/LDR sequence with the appropriate PC-relative group
relocations to load the value of current when needed. This means that
accessing current will still only require a single load as before,
avoiding the need for a literal to carry the address of the global
variable in each function. However, accessing thread_info will now
require this load as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-11-24 21:08:11 +08:00
|
|
|
.else
|
|
|
|
ldr r9, [r2, #TSK_STACK_CANARY & IMM12_MASK]
|
2017-07-01 00:03:59 +08:00
|
|
|
.endif
|
2010-06-08 09:50:33 +08:00
|
|
|
#endif
|
2021-11-24 19:09:48 +08:00
|
|
|
mov r7, r2 @ Preserve 'next'
|
2010-09-13 23:03:21 +08:00
|
|
|
#ifdef CONFIG_CPU_USE_DOMAINS
|
2005-04-17 06:20:36 +08:00
|
|
|
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
|
|
|
|
#endif
|
2006-06-21 20:31:52 +08:00
|
|
|
mov r5, r0
|
|
|
|
add r4, r2, #TI_CPU_SAVE
|
|
|
|
ldr r0, =thread_notify_head
|
|
|
|
mov r1, #THREAD_NOTIFY_SWITCH
|
|
|
|
bl atomic_notifier_call_chain
|
2021-11-24 19:09:48 +08:00
|
|
|
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \
|
|
|
|
!defined(CONFIG_STACKPROTECTOR_PER_TASK)
|
|
|
|
str r9, [r8]
|
2010-06-08 09:50:33 +08:00
|
|
|
#endif
|
2006-06-21 20:31:52 +08:00
|
|
|
mov r0, r5
|
2021-09-23 15:15:53 +08:00
|
|
|
#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK)
|
ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems
On UP systems, only a single task can be 'current' at the same time,
which means we can use a global variable to track it. This means we can
also enable THREAD_INFO_IN_TASK for those systems, as in that case,
thread_info is accessed via current rather than the other way around,
removing the need to store thread_info at the base of the task stack.
This, in turn, permits us to enable IRQ stacks and vmap'ed stacks on UP
systems as well.
To partially mitigate the performance overhead of this arrangement, use
a ADD/ADD/LDR sequence with the appropriate PC-relative group
relocations to load the value of current when needed. This means that
accessing current will still only require a single load as before,
avoiding the need for a literal to carry the address of the global
variable in each function. However, accessing thread_info will now
require this load as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-11-24 21:08:11 +08:00
|
|
|
set_current r7, r8
|
ARM: switch_to: clean up Thumb2 code path
The load-multiple instruction that essentially performs the switch_to
operation in ARM mode, by loading all callee save registers as well the
stack pointer and the program counter, is split into 3 separate loads
for Thumb-2, with the IP register used as a temporary to capture the
value of R4 before it gets overwritten.
We can clean this up a bit, by sticking with a single LDMIA instruction,
but one that pops SP and PC into IP and LR, respectively, and by using
ordinary move register and branch instructions to get those values into
SP and PC. This also allows us to move the set_current call closer to
the assignment of SP, reducing the window where those are mutually out
of sync. This is especially relevant for CONFIG_VMAP_STACK, which is
being introduced in a subsequent patch, where we need to issue a load
that might fault from the new stack while running from the old one, to
ensure that stale PMD entries in the VMALLOC space are synced up.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 00:00:01 +08:00
|
|
|
ldmia r4, {r4 - sl, fp, sp, pc} @ Load all regs saved previously
|
|
|
|
#else
|
|
|
|
mov r1, r7
|
|
|
|
ldmia r4, {r4 - sl, fp, ip, lr} @ Load all regs saved previously
|
2021-09-23 15:15:53 +08:00
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
|
|
@
|
|
|
|
@ Do a dummy read from the new stack while running from the old one so
|
|
|
|
@ that we can rely on do_translation_fault() to fix up any stale PMD
|
|
|
|
@ entries covering the vmalloc region.
|
|
|
|
@
|
|
|
|
ldr r2, [ip]
|
|
|
|
#endif
|
ARM: switch_to: clean up Thumb2 code path
The load-multiple instruction that essentially performs the switch_to
operation in ARM mode, by loading all callee save registers as well the
stack pointer and the program counter, is split into 3 separate loads
for Thumb-2, with the IP register used as a temporary to capture the
value of R4 before it gets overwritten.
We can clean this up a bit, by sticking with a single LDMIA instruction,
but one that pops SP and PC into IP and LR, respectively, and by using
ordinary move register and branch instructions to get those values into
SP and PC. This also allows us to move the set_current call closer to
the assignment of SP, reducing the window where those are mutually out
of sync. This is especially relevant for CONFIG_VMAP_STACK, which is
being introduced in a subsequent patch, where we need to issue a load
that might fault from the new stack while running from the old one, to
ensure that stale PMD entries in the VMALLOC space are synced up.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 00:00:01 +08:00
|
|
|
|
|
|
|
@ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what
|
|
|
|
@ effectuates the task switch, as that is what causes the observable
|
|
|
|
@ values of current and current_thread_info to change. When
|
|
|
|
@ CONFIG_THREAD_INFO_IN_TASK=y, setting current (and therefore
|
|
|
|
@ current_thread_info) is done explicitly, and the update of SP just
|
|
|
|
@ switches us to another stack, with few other side effects. In order
|
|
|
|
@ to prevent this distinction from causing any inconsistencies, let's
|
|
|
|
@ keep the 'set_current' call as close as we can to the update of SP.
|
ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems
On UP systems, only a single task can be 'current' at the same time,
which means we can use a global variable to track it. This means we can
also enable THREAD_INFO_IN_TASK for those systems, as in that case,
thread_info is accessed via current rather than the other way around,
removing the need to store thread_info at the base of the task stack.
This, in turn, permits us to enable IRQ stacks and vmap'ed stacks on UP
systems as well.
To partially mitigate the performance overhead of this arrangement, use
a ADD/ADD/LDR sequence with the appropriate PC-relative group
relocations to load the value of current when needed. This means that
accessing current will still only require a single load as before,
avoiding the need for a literal to carry the address of the global
variable in each function. However, accessing thread_info will now
require this load as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-11-24 21:08:11 +08:00
|
|
|
set_current r1, r2
|
ARM: switch_to: clean up Thumb2 code path
The load-multiple instruction that essentially performs the switch_to
operation in ARM mode, by loading all callee save registers as well the
stack pointer and the program counter, is split into 3 separate loads
for Thumb-2, with the IP register used as a temporary to capture the
value of R4 before it gets overwritten.
We can clean this up a bit, by sticking with a single LDMIA instruction,
but one that pops SP and PC into IP and LR, respectively, and by using
ordinary move register and branch instructions to get those values into
SP and PC. This also allows us to move the set_current call closer to
the assignment of SP, reducing the window where those are mutually out
of sync. This is especially relevant for CONFIG_VMAP_STACK, which is
being introduced in a subsequent patch, where we need to issue a load
that might fault from the new stack while running from the old one, to
ensure that stale PMD entries in the VMALLOC space are synced up.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-17 00:00:01 +08:00
|
|
|
mov sp, ip
|
|
|
|
ret lr
|
|
|
|
#endif
|
2009-02-16 18:42:09 +08:00
|
|
|
UNWIND(.fnend )
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(__switch_to)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2021-09-23 15:15:53 +08:00
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
|
|
.text
|
|
|
|
.align 2
|
|
|
|
__bad_stack:
|
|
|
|
@
|
|
|
|
@ We've just detected an overflow. We need to load the address of this
|
|
|
|
@ CPU's overflow stack into the stack pointer register. We have only one
|
|
|
|
@ scratch register so let's use a sequence of ADDs including one
|
|
|
|
@ involving the PC, and decorate them with PC-relative group
|
|
|
|
@ relocations. As these are ARM only, switch to ARM mode first.
|
|
|
|
@
|
|
|
|
@ We enter here with IP clobbered and its value stashed on the mode
|
|
|
|
@ stack.
|
|
|
|
@
|
|
|
|
THUMB( bx pc )
|
|
|
|
THUMB( nop )
|
|
|
|
THUMB( .arm )
|
2021-11-25 17:26:44 +08:00
|
|
|
ldr_this_cpu_armv6 ip, overflow_stack_ptr
|
2021-09-23 15:15:53 +08:00
|
|
|
|
|
|
|
str sp, [ip, #-4]! @ Preserve original SP value
|
|
|
|
mov sp, ip @ Switch to overflow stack
|
|
|
|
pop {ip} @ Original SP in IP
|
|
|
|
|
|
|
|
#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
|
|
|
|
mov ip, ip @ mov expected by unwinder
|
|
|
|
push {fp, ip, lr, pc} @ GCC flavor frame record
|
|
|
|
#else
|
|
|
|
str ip, [sp, #-8]! @ store original SP
|
|
|
|
push {fpreg, lr} @ Clang flavor frame record
|
|
|
|
#endif
|
|
|
|
UNWIND( ldr ip, [r0, #4] ) @ load exception LR
|
|
|
|
UNWIND( str ip, [sp, #12] ) @ store in the frame record
|
|
|
|
ldr ip, [r0, #12] @ reload IP
|
|
|
|
|
|
|
|
@ Store the original GPRs to the new stack.
|
|
|
|
svc_entry uaccess=0, overflow_check=0
|
|
|
|
|
|
|
|
UNWIND( .save {sp, pc} )
|
|
|
|
UNWIND( .save {fpreg, lr} )
|
|
|
|
UNWIND( .setfp fpreg, sp )
|
|
|
|
|
|
|
|
ldr fpreg, [sp, #S_SP] @ Add our frame record
|
|
|
|
@ to the linked list
|
|
|
|
#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
|
|
|
|
ldr r1, [fp, #4] @ reload SP at entry
|
|
|
|
add fp, fp, #12
|
|
|
|
#else
|
|
|
|
ldr r1, [fpreg, #8]
|
|
|
|
#endif
|
|
|
|
str r1, [sp, #S_SP] @ store in pt_regs
|
|
|
|
|
|
|
|
@ Stash the regs for handle_bad_stack
|
|
|
|
mov r0, sp
|
|
|
|
|
|
|
|
@ Time to die
|
|
|
|
bl handle_bad_stack
|
|
|
|
nop
|
|
|
|
UNWIND( .fnend )
|
|
|
|
ENDPROC(__bad_stack)
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
__INIT
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* User helpers.
|
|
|
|
*
|
|
|
|
* Each segment is 32-byte aligned and will be moved to the top of the high
|
|
|
|
* vector page. New segments (if ever needed) must be added in front of
|
|
|
|
* existing ones. This mechanism should be used only for things that are
|
|
|
|
* really small and justified, and not be abused freely.
|
|
|
|
*
|
2023-05-04 06:50:54 +08:00
|
|
|
* See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions.
|
2005-04-30 05:08:33 +08:00
|
|
|
*/
|
2009-07-24 19:32:54 +08:00
|
|
|
THUMB( .arm )
|
2005-04-30 05:08:33 +08:00
|
|
|
|
2006-08-19 00:20:15 +08:00
|
|
|
.macro usr_ret, reg
|
|
|
|
#ifdef CONFIG_ARM_THUMB
|
|
|
|
bx \reg
|
|
|
|
#else
|
2014-06-30 23:29:12 +08:00
|
|
|
ret \reg
|
2006-08-19 00:20:15 +08:00
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
2013-07-04 18:32:04 +08:00
|
|
|
.macro kuser_pad, sym, size
|
|
|
|
.if (. - \sym) & 3
|
|
|
|
.rept 4 - (. - \sym) & 3
|
|
|
|
.byte 0
|
|
|
|
.endr
|
|
|
|
.endif
|
|
|
|
.rept (\size - (. - \sym)) / 4
|
|
|
|
.word 0xe7fddef1
|
|
|
|
.endr
|
|
|
|
.endm
|
|
|
|
|
2013-07-24 01:37:00 +08:00
|
|
|
#ifdef CONFIG_KUSER_HELPERS
|
2005-04-30 05:08:33 +08:00
|
|
|
.align 5
|
|
|
|
.globl __kuser_helper_start
|
|
|
|
__kuser_helper_start:
|
|
|
|
|
2005-12-20 06:20:51 +08:00
|
|
|
/*
|
2011-06-20 11:36:03 +08:00
|
|
|
* Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
|
|
|
|
* kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
|
2005-12-20 06:20:51 +08:00
|
|
|
*/
|
|
|
|
|
2011-06-20 11:36:03 +08:00
|
|
|
__kuser_cmpxchg64: @ 0xffff0f60
|
|
|
|
|
2015-09-22 02:34:28 +08:00
|
|
|
#if defined(CONFIG_CPU_32v6K)
|
2011-06-20 11:36:03 +08:00
|
|
|
|
|
|
|
stmfd sp!, {r4, r5, r6, r7}
|
|
|
|
ldrd r4, r5, [r0] @ load old val
|
|
|
|
ldrd r6, r7, [r1] @ load new val
|
|
|
|
smp_dmb arm
|
|
|
|
1: ldrexd r0, r1, [r2] @ load current val
|
|
|
|
eors r3, r0, r4 @ compare with oldval (1)
|
2019-02-18 07:57:38 +08:00
|
|
|
eorseq r3, r1, r5 @ compare with oldval (2)
|
2011-06-20 11:36:03 +08:00
|
|
|
strexdeq r3, r6, r7, [r2] @ store newval if eq
|
|
|
|
teqeq r3, #1 @ success?
|
|
|
|
beq 1b @ if no then retry
|
ARM: 6516/1: Allow SMP_ON_UP to work with Thumb-2 kernels.
* __fixup_smp_on_up has been modified with support for the
THUMB2_KERNEL case. For THUMB2_KERNEL only, fixups are split
into halfwords in case of misalignment, since we can't rely on
unaligned accesses working before turning the MMU on.
No attempt is made to optimise the aligned case, since the
number of fixups is typically small, and it seems best to keep
the code as simple as possible.
* Add a rotate in the fixup_smp code in order to support
CPU_BIG_ENDIAN, as suggested by Nicolas Pitre.
* Add an assembly-time sanity-check to ALT_UP() to ensure that
the content really is the right size (4 bytes).
(No check is done for ALT_SMP(). Possibly, this could be fixed
by splitting the two uses ot ALT_SMP() (ALT_SMP...SMP_UP versus
ALT_SMP...SMP_UP_B) into two macros. In the first case,
ALT_SMP needs to expand to >= 4 bytes, not == 4.)
* smp_mpidr.h (which implements ALT_SMP()/ALT_UP() manually due
to macro limitations) has not been modified: the affected
instruction (mov) has no 16-bit encoding, so the correct
instruction size is satisfied in this case.
* A "mode" parameter has been added to smp_dmb:
smp_dmb arm @ assumes 4-byte instructions (for ARM code, e.g. kuser)
smp_dmb @ uses W() to ensure 4-byte instructions for ALT_SMP()
This avoids assembly failures due to use of W() inside smp_dmb,
when assembling pure-ARM code in the vectors page.
There might be a better way to achieve this.
* Kconfig: make SMP_ON_UP depend on
(!THUMB2_KERNEL || !BIG_ENDIAN) i.e., THUMB2_KERNEL is now
supported, but only if !BIG_ENDIAN (The fixup code for Thumb-2
currently assumes little-endian order.)
Tested using a single generic realview kernel on:
ARM RealView PB-A8 (CONFIG_THUMB2_KERNEL={n,y})
ARM RealView PBX-A9 (SMP)
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2010-12-01 22:39:23 +08:00
|
|
|
smp_dmb arm
|
2011-06-20 11:36:03 +08:00
|
|
|
rsbs r0, r3, #0 @ set returned val and C flag
|
|
|
|
ldmfd sp!, {r4, r5, r6, r7}
|
2012-02-03 18:08:05 +08:00
|
|
|
usr_ret lr
|
2011-06-20 11:36:03 +08:00
|
|
|
|
|
|
|
#elif !defined(CONFIG_SMP)
|
|
|
|
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The only thing that can break atomicity in this cmpxchg64
|
|
|
|
* implementation is either an IRQ or a data abort exception
|
|
|
|
* causing another process/thread to be scheduled in the middle of
|
|
|
|
* the critical sequence. The same strategy as for cmpxchg is used.
|
|
|
|
*/
|
|
|
|
stmfd sp!, {r4, r5, r6, lr}
|
|
|
|
ldmia r0, {r4, r5} @ load old val
|
|
|
|
ldmia r1, {r6, lr} @ load new val
|
|
|
|
1: ldmia r2, {r0, r1} @ load current val
|
|
|
|
eors r3, r0, r4 @ compare with oldval (1)
|
2019-02-18 07:57:38 +08:00
|
|
|
eorseq r3, r1, r5 @ compare with oldval (2)
|
|
|
|
2: stmiaeq r2, {r6, lr} @ store newval if eq
|
2011-06-20 11:36:03 +08:00
|
|
|
rsbs r0, r3, #0 @ set return val and C flag
|
|
|
|
ldmfd sp!, {r4, r5, r6, pc}
|
|
|
|
|
|
|
|
.text
|
|
|
|
kuser_cmpxchg64_fixup:
|
|
|
|
@ Called from kuser_cmpxchg_fixup.
|
2011-07-23 06:09:07 +08:00
|
|
|
@ r4 = address of interrupted insn (must be preserved).
|
2011-06-20 11:36:03 +08:00
|
|
|
@ sp = saved regs. r7 and r8 are clobbered.
|
|
|
|
@ 1b = first critical insn, 2b = last critical insn.
|
2011-07-23 06:09:07 +08:00
|
|
|
@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
|
2011-06-20 11:36:03 +08:00
|
|
|
mov r7, #0xffff0fff
|
|
|
|
sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
|
2011-07-23 06:09:07 +08:00
|
|
|
subs r8, r4, r7
|
2019-02-18 07:57:38 +08:00
|
|
|
rsbscs r8, r8, #(2b - 1b)
|
2011-06-20 11:36:03 +08:00
|
|
|
strcs r7, [sp, #S_PC]
|
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
|
|
|
bcc kuser_cmpxchg32_fixup
|
|
|
|
#endif
|
2014-06-30 23:29:12 +08:00
|
|
|
ret lr
|
2011-06-20 11:36:03 +08:00
|
|
|
.previous
|
|
|
|
|
|
|
|
#else
|
|
|
|
#warning "NPTL on non MMU needs fixing"
|
|
|
|
mov r0, #-1
|
|
|
|
adds r0, r0, #0
|
2006-08-19 00:20:15 +08:00
|
|
|
usr_ret lr
|
2011-06-20 11:36:03 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#else
|
|
|
|
#error "incoherent kernel configuration"
|
|
|
|
#endif
|
|
|
|
|
2013-07-04 18:32:04 +08:00
|
|
|
kuser_pad __kuser_cmpxchg64, 64
|
2005-12-20 06:20:51 +08:00
|
|
|
|
|
|
|
__kuser_memory_barrier: @ 0xffff0fa0
|
ARM: 6516/1: Allow SMP_ON_UP to work with Thumb-2 kernels.
* __fixup_smp_on_up has been modified with support for the
THUMB2_KERNEL case. For THUMB2_KERNEL only, fixups are split
into halfwords in case of misalignment, since we can't rely on
unaligned accesses working before turning the MMU on.
No attempt is made to optimise the aligned case, since the
number of fixups is typically small, and it seems best to keep
the code as simple as possible.
* Add a rotate in the fixup_smp code in order to support
CPU_BIG_ENDIAN, as suggested by Nicolas Pitre.
* Add an assembly-time sanity-check to ALT_UP() to ensure that
the content really is the right size (4 bytes).
(No check is done for ALT_SMP(). Possibly, this could be fixed
by splitting the two uses ot ALT_SMP() (ALT_SMP...SMP_UP versus
ALT_SMP...SMP_UP_B) into two macros. In the first case,
ALT_SMP needs to expand to >= 4 bytes, not == 4.)
* smp_mpidr.h (which implements ALT_SMP()/ALT_UP() manually due
to macro limitations) has not been modified: the affected
instruction (mov) has no 16-bit encoding, so the correct
instruction size is satisfied in this case.
* A "mode" parameter has been added to smp_dmb:
smp_dmb arm @ assumes 4-byte instructions (for ARM code, e.g. kuser)
smp_dmb @ uses W() to ensure 4-byte instructions for ALT_SMP()
This avoids assembly failures due to use of W() inside smp_dmb,
when assembling pure-ARM code in the vectors page.
There might be a better way to achieve this.
* Kconfig: make SMP_ON_UP depend on
(!THUMB2_KERNEL || !BIG_ENDIAN) i.e., THUMB2_KERNEL is now
supported, but only if !BIG_ENDIAN (The fixup code for Thumb-2
currently assumes little-endian order.)
Tested using a single generic realview kernel on:
ARM RealView PB-A8 (CONFIG_THUMB2_KERNEL={n,y})
ARM RealView PBX-A9 (SMP)
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2010-12-01 22:39:23 +08:00
|
|
|
smp_dmb arm
|
2006-08-19 00:20:15 +08:00
|
|
|
usr_ret lr
|
2005-12-20 06:20:51 +08:00
|
|
|
|
2013-07-04 18:32:04 +08:00
|
|
|
kuser_pad __kuser_memory_barrier, 32
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
__kuser_cmpxchg: @ 0xffff0fc0
|
|
|
|
|
2015-09-22 02:34:28 +08:00
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
2005-04-30 05:08:33 +08:00
|
|
|
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
|
2005-04-30 05:08:33 +08:00
|
|
|
/*
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
* The only thing that can break atomicity in this cmpxchg
|
|
|
|
* implementation is either an IRQ or a data abort exception
|
|
|
|
* causing another process/thread to be scheduled in the middle
|
|
|
|
* of the critical sequence. To prevent this, code is added to
|
|
|
|
* the IRQ and data abort exception handlers to set the pc back
|
|
|
|
* to the beginning of the critical section if it is found to be
|
|
|
|
* within that critical section (see kuser_cmpxchg_fixup).
|
2005-04-30 05:08:33 +08:00
|
|
|
*/
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
1: ldr r3, [r2] @ load current val
|
|
|
|
subs r3, r3, r0 @ compare with oldval
|
|
|
|
2: streq r1, [r2] @ store newval if eq
|
|
|
|
rsbs r0, r3, #0 @ set return val and C flag
|
|
|
|
usr_ret lr
|
|
|
|
|
|
|
|
.text
|
2011-06-20 11:36:03 +08:00
|
|
|
kuser_cmpxchg32_fixup:
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
@ Called from kuser_cmpxchg_check macro.
|
2011-06-25 22:44:20 +08:00
|
|
|
@ r4 = address of interrupted insn (must be preserved).
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
@ sp = saved regs. r7 and r8 are clobbered.
|
|
|
|
@ 1b = first critical insn, 2b = last critical insn.
|
2011-06-25 22:44:20 +08:00
|
|
|
@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
mov r7, #0xffff0fff
|
|
|
|
sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
|
2011-06-25 22:44:20 +08:00
|
|
|
subs r8, r4, r7
|
2019-02-18 07:57:38 +08:00
|
|
|
rsbscs r8, r8, #(2b - 1b)
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
strcs r7, [sp, #S_PC]
|
2014-06-30 23:29:12 +08:00
|
|
|
ret lr
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
.previous
|
|
|
|
|
2006-02-09 05:19:37 +08:00
|
|
|
#else
|
|
|
|
#warning "NPTL on non MMU needs fixing"
|
|
|
|
mov r0, #-1
|
|
|
|
adds r0, r0, #0
|
2006-08-19 00:20:15 +08:00
|
|
|
usr_ret lr
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
#endif
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
ARM: 6516/1: Allow SMP_ON_UP to work with Thumb-2 kernels.
* __fixup_smp_on_up has been modified with support for the
THUMB2_KERNEL case. For THUMB2_KERNEL only, fixups are split
into halfwords in case of misalignment, since we can't rely on
unaligned accesses working before turning the MMU on.
No attempt is made to optimise the aligned case, since the
number of fixups is typically small, and it seems best to keep
the code as simple as possible.
* Add a rotate in the fixup_smp code in order to support
CPU_BIG_ENDIAN, as suggested by Nicolas Pitre.
* Add an assembly-time sanity-check to ALT_UP() to ensure that
the content really is the right size (4 bytes).
(No check is done for ALT_SMP(). Possibly, this could be fixed
by splitting the two uses ot ALT_SMP() (ALT_SMP...SMP_UP versus
ALT_SMP...SMP_UP_B) into two macros. In the first case,
ALT_SMP needs to expand to >= 4 bytes, not == 4.)
* smp_mpidr.h (which implements ALT_SMP()/ALT_UP() manually due
to macro limitations) has not been modified: the affected
instruction (mov) has no 16-bit encoding, so the correct
instruction size is satisfied in this case.
* A "mode" parameter has been added to smp_dmb:
smp_dmb arm @ assumes 4-byte instructions (for ARM code, e.g. kuser)
smp_dmb @ uses W() to ensure 4-byte instructions for ALT_SMP()
This avoids assembly failures due to use of W() inside smp_dmb,
when assembling pure-ARM code in the vectors page.
There might be a better way to achieve this.
* Kconfig: make SMP_ON_UP depend on
(!THUMB2_KERNEL || !BIG_ENDIAN) i.e., THUMB2_KERNEL is now
supported, but only if !BIG_ENDIAN (The fixup code for Thumb-2
currently assumes little-endian order.)
Tested using a single generic realview kernel on:
ARM RealView PB-A8 (CONFIG_THUMB2_KERNEL={n,y})
ARM RealView PBX-A9 (SMP)
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2010-12-01 22:39:23 +08:00
|
|
|
smp_dmb arm
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
1: ldrex r3, [r2]
|
2005-04-30 05:08:33 +08:00
|
|
|
subs r3, r3, r0
|
|
|
|
strexeq r3, r1, [r2]
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
teqeq r3, #1
|
|
|
|
beq 1b
|
2005-04-30 05:08:33 +08:00
|
|
|
rsbs r0, r3, #0
|
[ARM] 4659/1: remove possibilities for spurious false negative with __kuser_cmpxchg
The ARM __kuser_cmpxchg routine is meant to implement an atomic cmpxchg
in user space. It however can produce spurious false negative if a
processor exception occurs in the middle of the operation. Normally
this is not a problem since cmpxchg is typically called in a loop until
it succeeds to implement an atomic increment for example.
Some use cases which don't involve a loop require that the operation be
100% reliable though. This patch changes the implementation so to
reattempt the operation after an exception has occurred in the critical
section rather than abort it.
Here's a simple program to test the fix (don't use CONFIG_NO_HZ in your
kernel as this depends on a sufficiently high interrupt rate):
#include <stdio.h>
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
int main()
{
int i, x = 0;
for (i = 0; i < 100000000; i++) {
int v = x;
if (__kernel_cmpxchg(v, v+1, &x))
printf("failed at %d: %d vs %d\n", i, v, x);
}
printf("done with %d vs %d\n", i, x);
return 0;
}
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2007-11-21 00:20:29 +08:00
|
|
|
/* beware -- each __kuser slot must be 8 instructions max */
|
2010-09-04 17:47:48 +08:00
|
|
|
ALT_SMP(b __kuser_memory_barrier)
|
|
|
|
ALT_UP(usr_ret lr)
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-07-04 18:32:04 +08:00
|
|
|
kuser_pad __kuser_cmpxchg, 32
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
__kuser_get_tls: @ 0xffff0fe0
|
2010-07-05 21:53:10 +08:00
|
|
|
ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init
|
2006-08-19 00:20:15 +08:00
|
|
|
usr_ret lr
|
2010-07-05 21:53:10 +08:00
|
|
|
mrc p15, 0, r0, c13, c0, 3 @ 0xffff0fe8 hardware TLS code
|
2013-07-04 18:32:04 +08:00
|
|
|
kuser_pad __kuser_get_tls, 16
|
|
|
|
.rep 3
|
2010-07-05 21:53:10 +08:00
|
|
|
.word 0 @ 0xffff0ff0 software TLS value, then
|
|
|
|
.endr @ pad up to __kuser_helper_version
|
2005-04-30 05:08:33 +08:00
|
|
|
|
|
|
|
__kuser_helper_version: @ 0xffff0ffc
|
|
|
|
.word ((__kuser_helper_end - __kuser_helper_start) >> 5)
|
|
|
|
|
|
|
|
.globl __kuser_helper_end
|
|
|
|
__kuser_helper_end:
|
|
|
|
|
2013-07-24 01:37:00 +08:00
|
|
|
#endif
|
|
|
|
|
2009-07-24 19:32:54 +08:00
|
|
|
THUMB( .thumb )
|
2005-04-30 05:08:33 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Vector stubs.
|
|
|
|
*
|
2013-07-04 18:40:32 +08:00
|
|
|
* This code is copied to 0xffff1000 so we can use branches in the
|
|
|
|
* vectors, rather than ldr's. Note that this code must not exceed
|
|
|
|
* a page size.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* Common stub entry macro:
|
|
|
|
* Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
|
2005-06-01 05:22:32 +08:00
|
|
|
*
|
|
|
|
* SP points to a minimal amount of processor-private memory, the address
|
|
|
|
* of which is copied into r0 for the mode specific abort handler.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-11-06 22:42:37 +08:00
|
|
|
.macro vector_stub, name, mode, correction=0
|
2005-04-17 06:20:36 +08:00
|
|
|
.align 5
|
2022-04-20 16:55:35 +08:00
|
|
|
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
|
|
|
vector_bhb_bpiall_\name:
|
|
|
|
mcr p15, 0, r0, c7, c5, 6 @ BPIALL
|
|
|
|
@ isb not needed due to "movs pc, lr" in the vector stub
|
|
|
|
@ which gives a "context synchronisation".
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
vector_\name:
|
|
|
|
.if \correction
|
|
|
|
sub lr, lr, #\correction
|
|
|
|
.endif
|
2005-06-01 05:22:32 +08:00
|
|
|
|
2022-02-11 00:05:45 +08:00
|
|
|
@ Save r0, lr_<exception> (parent PC)
|
2005-06-01 05:22:32 +08:00
|
|
|
stmia sp, {r0, lr} @ save r0, lr
|
2022-02-11 00:05:45 +08:00
|
|
|
|
|
|
|
@ Save spsr_<exception> (parent CPSR)
|
2022-04-20 17:02:43 +08:00
|
|
|
.Lvec_\name:
|
|
|
|
mrs lr, spsr
|
2005-06-01 05:22:32 +08:00
|
|
|
str lr, [sp, #8] @ save spsr
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
@
|
2005-06-01 05:22:32 +08:00
|
|
|
@ Prepare for SVC32 mode. IRQs remain disabled.
|
2005-04-17 06:20:36 +08:00
|
|
|
@
|
2005-06-01 05:22:32 +08:00
|
|
|
mrs r0, cpsr
|
2009-07-24 19:32:54 +08:00
|
|
|
eor r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE)
|
2005-06-01 05:22:32 +08:00
|
|
|
msr spsr_cxsf, r0
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-06-01 05:22:32 +08:00
|
|
|
@
|
|
|
|
@ the branch table must immediately follow this code
|
|
|
|
@
|
|
|
|
and lr, lr, #0x0f
|
2009-07-24 19:32:54 +08:00
|
|
|
THUMB( adr r0, 1f )
|
|
|
|
THUMB( ldr lr, [r0, lr, lsl #2] )
|
2005-11-06 22:42:37 +08:00
|
|
|
mov r0, sp
|
2009-07-24 19:32:54 +08:00
|
|
|
ARM( ldr lr, [pc, lr, lsl #2] )
|
2005-06-01 05:22:32 +08:00
|
|
|
movs pc, lr @ branch to handler in SVC mode
|
2008-08-28 18:22:32 +08:00
|
|
|
ENDPROC(vector_\name)
|
2009-07-24 19:32:52 +08:00
|
|
|
|
2022-02-11 00:05:45 +08:00
|
|
|
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
|
|
|
.subsection 1
|
|
|
|
.align 5
|
|
|
|
vector_bhb_loop8_\name:
|
|
|
|
.if \correction
|
|
|
|
sub lr, lr, #\correction
|
|
|
|
.endif
|
|
|
|
|
|
|
|
@ Save r0, lr_<exception> (parent PC)
|
|
|
|
stmia sp, {r0, lr}
|
|
|
|
|
|
|
|
@ bhb workaround
|
|
|
|
mov r0, #8
|
2022-04-20 16:46:17 +08:00
|
|
|
3: W(b) . + 4
|
2022-02-11 00:05:45 +08:00
|
|
|
subs r0, r0, #1
|
2022-03-12 01:13:17 +08:00
|
|
|
bne 3b
|
2022-04-20 16:57:45 +08:00
|
|
|
dsb nsh
|
|
|
|
@ isb not needed due to "movs pc, lr" in the vector stub
|
|
|
|
@ which gives a "context synchronisation".
|
2022-04-20 17:02:43 +08:00
|
|
|
b .Lvec_\name
|
2022-02-11 00:05:45 +08:00
|
|
|
ENDPROC(vector_bhb_loop8_\name)
|
|
|
|
.previous
|
|
|
|
#endif
|
|
|
|
|
2009-07-24 19:32:52 +08:00
|
|
|
.align 2
|
|
|
|
@ handler addresses follow this label
|
|
|
|
1:
|
2005-04-17 06:20:36 +08:00
|
|
|
.endm
|
|
|
|
|
2013-07-04 19:03:31 +08:00
|
|
|
.section .stubs, "ax", %progbits
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
@ These need to remain at the start of the section so that
|
|
|
|
@ they are in range of the 'SWI' entries in the vector tables
|
|
|
|
@ located 4k down.
|
|
|
|
.L__vector_swi:
|
2013-07-04 18:40:32 +08:00
|
|
|
.word vector_swi
|
2022-02-11 00:05:45 +08:00
|
|
|
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
.L__vector_bhb_loop8_swi:
|
2022-02-11 00:05:45 +08:00
|
|
|
.word vector_bhb_loop8_swi
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
.L__vector_bhb_bpiall_swi:
|
2022-02-11 00:05:45 +08:00
|
|
|
.word vector_bhb_bpiall_swi
|
|
|
|
#endif
|
2013-07-04 18:40:32 +08:00
|
|
|
|
|
|
|
vector_rst:
|
|
|
|
ARM( swi SYS_ERROR0 )
|
|
|
|
THUMB( svc #0 )
|
|
|
|
THUMB( nop )
|
|
|
|
b vector_und
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Interrupt dispatcher
|
|
|
|
*/
|
2005-11-06 22:42:37 +08:00
|
|
|
vector_stub irq, IRQ_MODE, 4
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.long __irq_usr @ 0 (USR_26 / USR_32)
|
|
|
|
.long __irq_invalid @ 1 (FIQ_26 / FIQ_32)
|
|
|
|
.long __irq_invalid @ 2 (IRQ_26 / IRQ_32)
|
|
|
|
.long __irq_svc @ 3 (SVC_26 / SVC_32)
|
|
|
|
.long __irq_invalid @ 4
|
|
|
|
.long __irq_invalid @ 5
|
|
|
|
.long __irq_invalid @ 6
|
|
|
|
.long __irq_invalid @ 7
|
|
|
|
.long __irq_invalid @ 8
|
|
|
|
.long __irq_invalid @ 9
|
|
|
|
.long __irq_invalid @ a
|
|
|
|
.long __irq_invalid @ b
|
|
|
|
.long __irq_invalid @ c
|
|
|
|
.long __irq_invalid @ d
|
|
|
|
.long __irq_invalid @ e
|
|
|
|
.long __irq_invalid @ f
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Data abort dispatcher
|
|
|
|
* Enter in ABT mode, spsr = USR CPSR, lr = USR PC
|
|
|
|
*/
|
2005-11-06 22:42:37 +08:00
|
|
|
vector_stub dabt, ABT_MODE, 8
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.long __dabt_usr @ 0 (USR_26 / USR_32)
|
|
|
|
.long __dabt_invalid @ 1 (FIQ_26 / FIQ_32)
|
|
|
|
.long __dabt_invalid @ 2 (IRQ_26 / IRQ_32)
|
|
|
|
.long __dabt_svc @ 3 (SVC_26 / SVC_32)
|
|
|
|
.long __dabt_invalid @ 4
|
|
|
|
.long __dabt_invalid @ 5
|
|
|
|
.long __dabt_invalid @ 6
|
|
|
|
.long __dabt_invalid @ 7
|
|
|
|
.long __dabt_invalid @ 8
|
|
|
|
.long __dabt_invalid @ 9
|
|
|
|
.long __dabt_invalid @ a
|
|
|
|
.long __dabt_invalid @ b
|
|
|
|
.long __dabt_invalid @ c
|
|
|
|
.long __dabt_invalid @ d
|
|
|
|
.long __dabt_invalid @ e
|
|
|
|
.long __dabt_invalid @ f
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefetch abort dispatcher
|
|
|
|
* Enter in ABT mode, spsr = USR CPSR, lr = USR PC
|
|
|
|
*/
|
2005-11-06 22:42:37 +08:00
|
|
|
vector_stub pabt, ABT_MODE, 4
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.long __pabt_usr @ 0 (USR_26 / USR_32)
|
|
|
|
.long __pabt_invalid @ 1 (FIQ_26 / FIQ_32)
|
|
|
|
.long __pabt_invalid @ 2 (IRQ_26 / IRQ_32)
|
|
|
|
.long __pabt_svc @ 3 (SVC_26 / SVC_32)
|
|
|
|
.long __pabt_invalid @ 4
|
|
|
|
.long __pabt_invalid @ 5
|
|
|
|
.long __pabt_invalid @ 6
|
|
|
|
.long __pabt_invalid @ 7
|
|
|
|
.long __pabt_invalid @ 8
|
|
|
|
.long __pabt_invalid @ 9
|
|
|
|
.long __pabt_invalid @ a
|
|
|
|
.long __pabt_invalid @ b
|
|
|
|
.long __pabt_invalid @ c
|
|
|
|
.long __pabt_invalid @ d
|
|
|
|
.long __pabt_invalid @ e
|
|
|
|
.long __pabt_invalid @ f
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Undef instr entry dispatcher
|
|
|
|
* Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
|
|
|
|
*/
|
2005-11-06 22:42:37 +08:00
|
|
|
vector_stub und, UND_MODE
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.long __und_usr @ 0 (USR_26 / USR_32)
|
|
|
|
.long __und_invalid @ 1 (FIQ_26 / FIQ_32)
|
|
|
|
.long __und_invalid @ 2 (IRQ_26 / IRQ_32)
|
|
|
|
.long __und_svc @ 3 (SVC_26 / SVC_32)
|
|
|
|
.long __und_invalid @ 4
|
|
|
|
.long __und_invalid @ 5
|
|
|
|
.long __und_invalid @ 6
|
|
|
|
.long __und_invalid @ 7
|
|
|
|
.long __und_invalid @ 8
|
|
|
|
.long __und_invalid @ 9
|
|
|
|
.long __und_invalid @ a
|
|
|
|
.long __und_invalid @ b
|
|
|
|
.long __und_invalid @ c
|
|
|
|
.long __und_invalid @ d
|
|
|
|
.long __und_invalid @ e
|
|
|
|
.long __und_invalid @ f
|
|
|
|
|
|
|
|
.align 5
|
|
|
|
|
2013-07-04 18:40:32 +08:00
|
|
|
/*=============================================================================
|
|
|
|
* Address exception handler
|
|
|
|
*-----------------------------------------------------------------------------
|
|
|
|
* These aren't too critical.
|
|
|
|
* (they're not supposed to happen, and won't happen in 32-bit data mode).
|
|
|
|
*/
|
|
|
|
|
|
|
|
vector_addrexcptn:
|
|
|
|
b vector_addrexcptn
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*=============================================================================
|
2014-09-18 00:12:06 +08:00
|
|
|
* FIQ "NMI" handler
|
2005-04-17 06:20:36 +08:00
|
|
|
*-----------------------------------------------------------------------------
|
2014-09-18 00:12:06 +08:00
|
|
|
* Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
|
2022-02-11 00:05:45 +08:00
|
|
|
* systems. This must be the last vector stub, so lets place it in its own
|
|
|
|
* subsection.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2022-02-11 00:05:45 +08:00
|
|
|
.subsection 2
|
2014-09-18 00:12:06 +08:00
|
|
|
vector_stub fiq, FIQ_MODE, 4
|
|
|
|
|
|
|
|
.long __fiq_usr @ 0 (USR_26 / USR_32)
|
|
|
|
.long __fiq_svc @ 1 (FIQ_26 / FIQ_32)
|
|
|
|
.long __fiq_svc @ 2 (IRQ_26 / IRQ_32)
|
|
|
|
.long __fiq_svc @ 3 (SVC_26 / SVC_32)
|
|
|
|
.long __fiq_svc @ 4
|
|
|
|
.long __fiq_svc @ 5
|
|
|
|
.long __fiq_svc @ 6
|
|
|
|
.long __fiq_abt @ 7
|
|
|
|
.long __fiq_svc @ 8
|
|
|
|
.long __fiq_svc @ 9
|
|
|
|
.long __fiq_svc @ a
|
|
|
|
.long __fiq_svc @ b
|
|
|
|
.long __fiq_svc @ c
|
|
|
|
.long __fiq_svc @ d
|
|
|
|
.long __fiq_svc @ e
|
|
|
|
.long __fiq_svc @ f
|
2005-04-17 06:20:36 +08:00
|
|
|
|
ARM: 8515/2: move .vectors and .stubs sections back into the kernel VMA
Commit b9b32bf70f2f ("ARM: use linker magic for vectors and vector stubs")
updated the linker script to emit the .vectors and .stubs sections into a
VMA range that is zero based and disjoint from the normal static kernel
region. The reason for that was that this way, the sections can be placed
exactly 4 KB apart, while the payload of the .vectors section is only 32
bytes.
Since the symbols that are part of the .stubs section are emitted into the
kallsyms table, they appear with zero based addresses as well, e.g.,
00001004 t vector_rst
00001020 t vector_irq
000010a0 t vector_dabt
00001120 t vector_pabt
000011a0 t vector_und
00001220 t vector_addrexcptn
00001240 t vector_fiq
00001240 T vector_fiq_offset
As this confuses perf when it accesses the kallsyms tables, commit
7122c3e9154b ("scripts/link-vmlinux.sh: only filter kernel symbols for
arm") implemented a somewhat ugly special case for ARM, where the value
of CONFIG_PAGE_OFFSET is passed to scripts/kallsyms, and symbols whose
addresses are below it are filtered out. Note that this special case only
applies to CONFIG_XIP_KERNEL=n, not because the issue the patch addresses
exists only in that case, but because finding a limit below which to apply
the filtering is not entirely straightforward.
Since the .vectors and .stubs sections contain position independent code
that is never executed in place, we can emit it at its most likely runtime
VMA (for more recent CPUs), which is 0xffff0000 for the vector table and
0xffff1000 for the stubs. Not only does this fix the perf issue with
kallsyms, allowing us to drop the special case in scripts/kallsyms
entirely, it also gives debuggers a more realistic view of the address
space, and setting breakpoints or single stepping through code in the
vector table or the stubs is more likely to work as expected on CPUs that
use a high vector address. E.g.,
00001240 A vector_fiq_offset
...
c0c35000 T __init_begin
c0c35000 T __vectors_start
c0c35020 T __stubs_start
c0c35020 T __vectors_end
c0c352e0 T _sinittext
c0c352e0 T __stubs_end
...
ffff1004 t vector_rst
ffff1020 t vector_irq
ffff10a0 t vector_dabt
ffff1120 t vector_pabt
ffff11a0 t vector_und
ffff1220 t vector_addrexcptn
ffff1240 T vector_fiq
(Note that vector_fiq_offset is now an absolute symbol, which kallsyms
already ignores by default)
The LMA footprint is identical with or without this change, only the VMAs
are different:
Before:
Idx Name Size VMA LMA File off Algn
...
14 .notes 00000024 c0c34020 c0c34020 00a34020 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
15 .vectors 00000020 00000000 c0c35000 00a40000 2**1
CONTENTS, ALLOC, LOAD, READONLY, CODE
16 .stubs 000002c0 00001000 c0c35020 00a41000 2**5
CONTENTS, ALLOC, LOAD, READONLY, CODE
17 .init.text 0006b1b8 c0c352e0 c0c352e0 00a452e0 2**5
CONTENTS, ALLOC, LOAD, READONLY, CODE
...
After:
Idx Name Size VMA LMA File off Algn
...
14 .notes 00000024 c0c34020 c0c34020 00a34020 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
15 .vectors 00000020 ffff0000 c0c35000 00a40000 2**1
CONTENTS, ALLOC, LOAD, READONLY, CODE
16 .stubs 000002c0 ffff1000 c0c35020 00a41000 2**5
CONTENTS, ALLOC, LOAD, READONLY, CODE
17 .init.text 0006b1b8 c0c352e0 c0c352e0 00a452e0 2**5
CONTENTS, ALLOC, LOAD, READONLY, CODE
...
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-10 18:41:08 +08:00
|
|
|
.globl vector_fiq
|
2013-07-09 08:03:17 +08:00
|
|
|
|
2013-07-04 19:03:31 +08:00
|
|
|
.section .vectors, "ax", %progbits
|
|
|
|
W(b) vector_rst
|
|
|
|
W(b) vector_und
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi )
|
|
|
|
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi )
|
|
|
|
W(ldr) pc, .
|
2013-07-04 19:03:31 +08:00
|
|
|
W(b) vector_pabt
|
|
|
|
W(b) vector_dabt
|
|
|
|
W(b) vector_addrexcptn
|
|
|
|
W(b) vector_irq
|
|
|
|
W(b) vector_fiq
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2022-02-11 00:05:45 +08:00
|
|
|
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
|
|
|
|
.section .vectors.bhb.loop8, "ax", %progbits
|
|
|
|
W(b) vector_rst
|
|
|
|
W(b) vector_bhb_loop8_und
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi )
|
|
|
|
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi )
|
|
|
|
W(ldr) pc, .
|
2022-02-11 00:05:45 +08:00
|
|
|
W(b) vector_bhb_loop8_pabt
|
|
|
|
W(b) vector_bhb_loop8_dabt
|
|
|
|
W(b) vector_addrexcptn
|
|
|
|
W(b) vector_bhb_loop8_irq
|
|
|
|
W(b) vector_bhb_loop8_fiq
|
|
|
|
|
|
|
|
.section .vectors.bhb.bpiall, "ax", %progbits
|
|
|
|
W(b) vector_rst
|
|
|
|
W(b) vector_bhb_bpiall_und
|
ARM: 9201/1: spectre-bhb: rely on linker to emit cross-section literal loads
The assembler does not permit 'LDR PC, <sym>' when the symbol lives in a
different section, which is why we have been relying on rather fragile
open-coded arithmetic to load the address of the vector_swi routine into
the program counter using a single LDR instruction in the SWI slot in
the vector table. The literal was moved to a different section to in
commit 19accfd373847 ("ARM: move vector stubs") to ensure that the
vector stubs page does not need to be mapped readable for user space,
which is the case for the vector page itself, as it carries the kuser
helpers as well.
So the cross-section literal load is open-coded, and this relies on the
address of vector_swi to be at the very start of the vector stubs page,
and we won't notice if we got it wrong until booting the kernel and see
it break. Fortunately, it was guaranteed to break, so this was fragile
but not problematic.
Now that we have added two other variants of the vector table, we have 3
occurrences of the same trick, and so the size of our ISA/compiler/CPU
validation space has tripled, in a way that may cause regressions to only
be observed once booting the image in question on a CPU that exercises a
particular vector table.
So let's switch to true cross section references, and let the linker fix
them up like it fixes up all the other cross section references in the
vector page.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-04-20 17:06:50 +08:00
|
|
|
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi )
|
|
|
|
THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_bpiall_swi )
|
|
|
|
W(ldr) pc, .
|
2022-02-11 00:05:45 +08:00
|
|
|
W(b) vector_bhb_bpiall_pabt
|
|
|
|
W(b) vector_bhb_bpiall_dabt
|
|
|
|
W(b) vector_addrexcptn
|
|
|
|
W(b) vector_bhb_bpiall_irq
|
|
|
|
W(b) vector_bhb_bpiall_fiq
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
.data
|
2017-07-26 19:49:31 +08:00
|
|
|
.align 2
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
.globl cr_alignment
|
|
|
|
cr_alignment:
|
|
|
|
.space 4
|