Merge remote-tracking branch 'origin/kvm-arm64/psci-relay' into kvmarm-master/next

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2020-12-09 10:00:24 +00:00
commit 3a514592b6
65 changed files with 1722 additions and 834 deletions

View File

@ -2259,6 +2259,16 @@
for all guests.
Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
kvm-arm.mode=
[KVM,ARM] Select one of KVM/arm64's modes of operation.
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support and
the value of CONFIG_ARM64_VHE.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0
system registers

View File

@ -195,7 +195,6 @@ config ARM64
select PCI_SYSCALL if PCI
select POWER_RESET
select POWER_SUPPLY
select SET_FS
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
@ -1388,6 +1387,9 @@ config ARM64_PAN
The feature is detected at runtime, and will remain as a 'nop'
instruction if the cpu does not implement the feature.
config AS_HAS_LDAPR
def_bool $(as-instr,.arch_extension rcpc)
config ARM64_LSE_ATOMICS
bool
default ARM64_USE_LSE_ATOMICS
@ -1425,27 +1427,6 @@ endmenu
menu "ARMv8.2 architectural features"
config ARM64_UAO
bool "Enable support for User Access Override (UAO)"
default y
help
User Access Override (UAO; part of the ARMv8.2 Extensions)
causes the 'unprivileged' variant of the load/store instructions to
be overridden to be privileged.
This option changes get_user() and friends to use the 'unprivileged'
variant of the load/store instructions. This ensures that user-space
really did have access to the supplied memory. When addr_limit is
set to kernel memory the UAO bit will be set, allowing privileged
access to kernel memory.
Choosing this option will cause copy_to_user() et al to use user-space
memory permissions.
The feature is detected at runtime, the kernel will use the
regular load/store instructions if the cpu does not implement the
feature.
config ARM64_PMEM
bool "Enable support for persistent memory"
select ARCH_HAS_PMEM_API

View File

@ -0,0 +1,217 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_ALTERNATIVE_MACROS_H
#define __ASM_ALTERNATIVE_MACROS_H
#include <asm/cpucaps.h>
#define ARM64_CB_PATCH ARM64_NCAPS
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#define ALTINSTR_ENTRY(feature) \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
#define ALTINSTR_ENTRY_CB(feature, cb) \
" .word 661b - .\n" /* label */ \
" .word " __stringify(cb) "- .\n" /* callback */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
/*
* alternative assembly primitive:
*
* If any of these .org directive fail, it means that insn1 and insn2
* don't have the same length. This used to be written as
*
* .if ((664b-663b) != (662b-661b))
* .error "Alternatives instruction length mismatch"
* .endif
*
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
*
* Alternatives with callbacks do not generate replacement instructions.
*/
#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature) \
".popsection\n" \
".subsection 1\n" \
"663:\n\t" \
newinstr "\n" \
"664:\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n\t" \
".previous\n" \
".endif\n"
#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY_CB(feature, cb) \
".popsection\n" \
"663:\n\t" \
"664:\n\t" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
#define ALTERNATIVE_CB(oldinstr, cb) \
__ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
.hword \feature
.byte \orig_len
.byte \alt_len
.endm
.macro alternative_insn insn1, insn2, cap, enable = 1
.if \enable
661: \insn1
662: .pushsection .altinstructions, "a"
altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
.popsection
.subsection 1
663: \insn2
664: .previous
.org . - (664b-663b) + (662b-661b)
.org . - (662b-661b) + (664b-663b)
.endif
.endm
/*
* Alternative sequences
*
* The code for the case where the capability is not present will be
* assembled and linked as normal. There are no restrictions on this
* code.
*
* The code for the case where the capability is present will be
* assembled into a special section to be used for dynamic patching.
* Code for that case must:
*
* 1. Be exactly the same length (in bytes) as the default code
* sequence.
*
* 2. Not contain a branch target that is used outside of the
* alternative sequence it is defined in (branches into an
* alternative sequence are not fixed up).
*/
/*
* Begin an alternative code sequence.
*/
.macro alternative_if_not cap
.set .Lasm_alt_mode, 0
.pushsection .altinstructions, "a"
altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
.popsection
661:
.endm
.macro alternative_if cap
.set .Lasm_alt_mode, 1
.pushsection .altinstructions, "a"
altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
.popsection
.subsection 1
.align 2 /* So GAS knows label 661 is suitably aligned */
661:
.endm
.macro alternative_cb cb
.set .Lasm_alt_mode, 0
.pushsection .altinstructions, "a"
altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
.popsection
661:
.endm
/*
* Provide the other half of the alternative code sequence.
*/
.macro alternative_else
662:
.if .Lasm_alt_mode==0
.subsection 1
.else
.previous
.endif
663:
.endm
/*
* Complete an alternative code sequence.
*/
.macro alternative_endif
664:
.if .Lasm_alt_mode==0
.previous
.endif
.org . - (664b-663b) + (662b-661b)
.org . - (662b-661b) + (664b-663b)
.endm
/*
* Callback-based alternative epilogue
*/
.macro alternative_cb_end
662:
.endm
/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
* previous case.
*/
.macro alternative_else_nop_endif
alternative_else
nops (662b-661b) / AARCH64_INSN_SIZE
alternative_endif
.endm
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
.macro user_alt, label, oldinstr, newinstr, cond
9999: alternative_insn "\oldinstr", "\newinstr", \cond
_asm_extable 9999b, \label
.endm
#endif /* __ASSEMBLY__ */
/*
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
*
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
* will be omitted, including oldinstr.
*/
#define ALTERNATIVE(oldinstr, newinstr, ...) \
_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
#endif /* __ASM_ALTERNATIVE_MACROS_H */

View File

@ -2,17 +2,13 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
#include <asm/cpucaps.h>
#include <asm/insn.h>
#define ARM64_CB_PATCH ARM64_NCAPS
#include <asm/alternative-macros.h>
#ifndef __ASSEMBLY__
#include <linux/init.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
#define ALTINSTR_ENTRY(feature) \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
#define ALTINSTR_ENTRY_CB(feature, cb) \
" .word 661b - .\n" /* label */ \
" .word " __stringify(cb) "- .\n" /* callback */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
/*
* alternative assembly primitive:
*
* If any of these .org directive fail, it means that insn1 and insn2
* don't have the same length. This used to be written as
*
* .if ((664b-663b) != (662b-661b))
* .error "Alternatives instruction length mismatch"
* .endif
*
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
*
* Alternatives with callbacks do not generate replacement instructions.
*/
#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature) \
".popsection\n" \
".subsection 1\n" \
"663:\n\t" \
newinstr "\n" \
"664:\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n\t" \
".previous\n" \
".endif\n"
#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY_CB(feature, cb) \
".popsection\n" \
"663:\n\t" \
"664:\n\t" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
#define ALTERNATIVE_CB(oldinstr, cb) \
__ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
.hword \feature
.byte \orig_len
.byte \alt_len
.endm
.macro alternative_insn insn1, insn2, cap, enable = 1
.if \enable
661: \insn1
662: .pushsection .altinstructions, "a"
altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
.popsection
.subsection 1
663: \insn2
664: .previous
.org . - (664b-663b) + (662b-661b)
.org . - (662b-661b) + (664b-663b)
.endif
.endm
/*
* Alternative sequences
*
* The code for the case where the capability is not present will be
* assembled and linked as normal. There are no restrictions on this
* code.
*
* The code for the case where the capability is present will be
* assembled into a special section to be used for dynamic patching.
* Code for that case must:
*
* 1. Be exactly the same length (in bytes) as the default code
* sequence.
*
* 2. Not contain a branch target that is used outside of the
* alternative sequence it is defined in (branches into an
* alternative sequence are not fixed up).
*/
/*
* Begin an alternative code sequence.
*/
.macro alternative_if_not cap
.set .Lasm_alt_mode, 0
.pushsection .altinstructions, "a"
altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
.popsection
661:
.endm
.macro alternative_if cap
.set .Lasm_alt_mode, 1
.pushsection .altinstructions, "a"
altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
.popsection
.subsection 1
.align 2 /* So GAS knows label 661 is suitably aligned */
661:
.endm
.macro alternative_cb cb
.set .Lasm_alt_mode, 0
.pushsection .altinstructions, "a"
altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
.popsection
661:
.endm
/*
* Provide the other half of the alternative code sequence.
*/
.macro alternative_else
662:
.if .Lasm_alt_mode==0
.subsection 1
.else
.previous
.endif
663:
.endm
/*
* Complete an alternative code sequence.
*/
.macro alternative_endif
664:
.if .Lasm_alt_mode==0
.previous
.endif
.org . - (664b-663b) + (662b-661b)
.org . - (662b-661b) + (664b-663b)
.endm
/*
* Callback-based alternative epilogue
*/
.macro alternative_cb_end
662:
.endm
/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
* previous case.
*/
.macro alternative_else_nop_endif
alternative_else
nops (662b-661b) / AARCH64_INSN_SIZE
alternative_endif
.endm
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
.macro user_alt, label, oldinstr, newinstr, cond
9999: alternative_insn "\oldinstr", "\newinstr", \cond
_asm_extable 9999b, \label
.endm
/*
* Generate the assembly for UAO alternatives with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
* unprivileged instructions, and USER() only works for single instructions.
*/
#ifdef CONFIG_ARM64_UAO
.macro uao_ldp l, reg1, reg2, addr, post_inc
alternative_if_not ARM64_HAS_UAO
8888: ldp \reg1, \reg2, [\addr], \post_inc;
8889: nop;
nop;
alternative_else
ldtr \reg1, [\addr];
ldtr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
alternative_endif
_asm_extable 8888b,\l;
_asm_extable 8889b,\l;
.endm
.macro uao_stp l, reg1, reg2, addr, post_inc
alternative_if_not ARM64_HAS_UAO
8888: stp \reg1, \reg2, [\addr], \post_inc;
8889: nop;
nop;
alternative_else
sttr \reg1, [\addr];
sttr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
alternative_endif
_asm_extable 8888b,\l;
_asm_extable 8889b,\l;
.endm
.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
alternative_if_not ARM64_HAS_UAO
8888: \inst \reg, [\addr], \post_inc;
nop;
alternative_else
\alt_inst \reg, [\addr];
add \addr, \addr, \post_inc;
alternative_endif
_asm_extable 8888b,\l;
.endm
#else
.macro uao_ldp l, reg1, reg2, addr, post_inc
USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
.endm
.macro uao_stp l, reg1, reg2, addr, post_inc
USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
.endm
.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
USER(\l, \inst \reg, [\addr], \post_inc)
.endm
#endif
#endif /* __ASSEMBLY__ */
/*
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
*
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
* will be omitted, including oldinstr.
*/
#define ALTERNATIVE(oldinstr, newinstr, ...) \
_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */

View File

@ -2,7 +2,7 @@
#ifndef __ASM_ASM_UACCESS_H
#define __ASM_ASM_UACCESS_H
#include <asm/alternative.h>
#include <asm/alternative-macros.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
@ -58,4 +58,33 @@ alternative_else_nop_endif
.endm
#endif
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
* unprivileged instructions, and USER() only works for single instructions.
*/
.macro user_ldp l, reg1, reg2, addr, post_inc
8888: ldtr \reg1, [\addr];
8889: ldtr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
_asm_extable 8888b,\l;
_asm_extable 8889b,\l;
.endm
.macro user_stp l, reg1, reg2, addr, post_inc
8888: sttr \reg1, [\addr];
8889: sttr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
_asm_extable 8888b,\l;
_asm_extable 8889b,\l;
.endm
.macro user_ldst l, inst, reg, addr, post_inc
8888: \inst \reg, [\addr];
add \addr, \addr, \post_inc;
_asm_extable 8888b,\l;
.endm
#endif

View File

@ -16,8 +16,6 @@
#define ARM64_WORKAROUND_CAVIUM_23154 6
#define ARM64_WORKAROUND_834220 7
#define ARM64_HAS_NO_HW_PREFETCH 8
#define ARM64_HAS_UAO 9
#define ARM64_ALT_PAN_NOT_UAO 10
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
@ -66,7 +64,9 @@
#define ARM64_HAS_TLB_RANGE 56
#define ARM64_MTE 57
#define ARM64_WORKAROUND_1508412 58
#define ARM64_HAS_LDAPR 59
#define ARM64_KVM_PROTECTED_MODE 60
#define ARM64_NCAPS 59
#define ARM64_NCAPS 61
#endif /* __ASM_CPUCAPS_H */

View File

@ -669,10 +669,16 @@ static __always_inline bool system_supports_fpsimd(void)
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
}
static inline bool system_uses_hw_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_PAN) &&
cpus_have_const_cap(ARM64_HAS_PAN);
}
static inline bool system_uses_ttbr0_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
!cpus_have_const_cap(ARM64_HAS_PAN);
!system_uses_hw_pan();
}
static __always_inline bool system_supports_sve(void)
@ -769,6 +775,13 @@ static inline bool cpu_has_hw_af(void)
ID_AA64MMFR1_HADBS_SHIFT);
}
static inline bool cpu_has_pan(void)
{
u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
ID_AA64MMFR1_PAN_SHIFT);
}
#ifdef CONFIG_ARM64_AMU_EXTN
/* Check whether the cpu supports the Activity Monitors Unit (AMU) */
extern bool cpu_has_amu_feat(int cpu);

View File

@ -0,0 +1,181 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#ifndef __ARM_KVM_INIT_H__
#define __ARM_KVM_INIT_H__
#ifndef __ASSEMBLY__
#error Assembly-only header
#endif
#include <asm/kvm_arm.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
#include <linux/irqchip/arm-gic-v3.h>
.macro __init_el2_sctlr
mov_q x0, INIT_SCTLR_EL2_MMU_OFF
msr sctlr_el2, x0
isb
.endm
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
* and EL0 accesses are configured in the later stage of boot process.
* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
.macro __init_el2_timers mode
.ifeqs "\mode", "nvhe"
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
.endif
msr cntvoff_el2, xzr // Clear virtual offset
.endm
.macro __init_el2_debug mode
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 1f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
1:
csel x2, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 3f // Skip if SPE not present
.ifeqs "\mode", "nvhe"
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x0, 2f // then permit sampling of physical
mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
2:
mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x2, x2, x0 // If we don't have VHE, then
// use EL1&0 translation.
.else
orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation
// and disable access from EL1
.endif
3:
msr mdcr_el2, x2 // Configure debug traps
.endm
/* LORegions */
.macro __init_el2_lor
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
cbz x0, 1f
msr_s SYS_LORC_EL1, xzr
1:
.endm
/* Stage-2 translation */
.macro __init_el2_stage2
msr vttbr_el2, xzr
.endm
/* GICv3 system register access */
.macro __init_el2_gicv3
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 1f
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 1f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
1:
.endm
.macro __init_el2_hstr
msr hstr_el2, xzr // Disable CP15 traps to EL2
.endm
/* Virtual CPU ID registers */
.macro __init_el2_nvhe_idregs
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
.endm
/* Coprocessor traps */
.macro __init_el2_nvhe_cptr
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
.endm
/* SVE register access */
.macro __init_el2_nvhe_sve
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 1f
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
1:
.endm
.macro __init_el2_nvhe_prepare_eret
mov x0, #INIT_PSTATE_EL1
msr spsr_el2, x0
.endm
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2.
*
* Regs: x0, x1 and x2 are clobbered.
*/
.macro init_el2_state mode
.ifnes "\mode", "vhe"
.ifnes "\mode", "nvhe"
.error "Invalid 'mode' argument"
.endif
.endif
__init_el2_sctlr
__init_el2_timers \mode
__init_el2_debug \mode
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
/*
* When VHE is not in use, early init of EL2 needs to be done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
.ifeqs "\mode", "nvhe"
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
__init_el2_nvhe_prepare_eret
.endif
.endm
#endif /* __ARM_KVM_INIT_H__ */

View File

@ -10,6 +10,5 @@
#include <linux/sched.h>
extern unsigned long arch_align_stack(unsigned long sp);
void uao_thread_switch(struct task_struct *next);
#endif /* __ASM_EXEC_H */

View File

@ -16,7 +16,7 @@
do { \
unsigned int loops = FUTEX_MAX_LOOPS; \
\
uaccess_enable(); \
uaccess_enable_privileged(); \
asm volatile( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
@ -39,7 +39,7 @@ do { \
"+r" (loops) \
: "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \
: "memory"); \
uaccess_disable(); \
uaccess_disable_privileged(); \
} while (0)
static inline int
@ -95,7 +95,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
return -EFAULT;
uaddr = __uaccess_mask_ptr(_uaddr);
uaccess_enable();
uaccess_enable_privileged();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
@ -118,7 +118,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
: "memory");
uaccess_disable();
uaccess_disable_privileged();
if (!ret)
*uval = val;

View File

@ -10,8 +10,7 @@
#include <linux/build_bug.h>
#include <linux/types.h>
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
#include <asm/alternative.h>
#ifndef __ASSEMBLY__
/*

View File

@ -80,6 +80,7 @@
HCR_FMO | HCR_IMO | HCR_PTW )
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */

View File

@ -148,6 +148,14 @@ extern void *__vhe_undefined_symbol;
#endif
struct kvm_nvhe_init_params {
unsigned long mair_el2;
unsigned long tcr_el2;
unsigned long tpidr_el2;
unsigned long stack_hyp_va;
phys_addr_t pgd_pa;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \
({ \
@ -163,10 +171,8 @@ struct kvm_vcpu;
struct kvm_s2_mmu;
DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector);
DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];

View File

@ -50,6 +50,16 @@
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
/*
* Mode of operation configurable with kvm-arm.mode early param.
* See Documentation/admin-guide/kernel-parameters.txt for more information.
*/
enum kvm_mode {
KVM_MODE_DEFAULT,
KVM_MODE_PROTECTED,
};
enum kvm_mode kvm_get_mode(void);
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;

View File

@ -14,6 +14,7 @@
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
#define read_sysreg_elx(r,nvh,vh) \
({ \
@ -92,10 +93,11 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
void __noreturn hyp_panic(void);
#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
#endif
#endif /* __ARM64_KVM_HYP_H__ */

View File

@ -94,6 +94,30 @@ alternative_cb_end
sub \reg, \reg, \tmp
.endm
/*
* Convert a kernel image address to a hyp VA
* reg: kernel address to be converted in place
* tmp: temporary register
*
* The actual code generation takes place in kvm_get_kimage_voffset, and
* the instructions below are only there to reserve the space and
* perform the register allocation (kvm_update_kimg_phys_offset uses the
* specific registers encoded in the instructions).
*/
.macro kimg_hyp_va reg, tmp
alternative_cb kvm_update_kimg_phys_offset
movz \tmp, #0
movk \tmp, #0, lsl #16
movk \tmp, #0, lsl #32
movk \tmp, #0, lsl #48
alternative_cb_end
sub \reg, \reg, \tmp
mov_q \tmp, PAGE_OFFSET
orr \reg, \reg, \tmp
kern_hyp_va \reg
.endm
#else
#include <linux/pgtable.h>

View File

@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd)
#define this_cpu_cmpxchg_8(pcp, o, n) \
_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
#ifdef __KVM_NVHE_HYPERVISOR__
extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
#define __per_cpu_offset
#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
#endif
#include <asm-generic/percpu.h>
/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */

View File

@ -8,9 +8,6 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
#define KERNEL_DS UL(-1)
#define USER_DS ((UL(1) << VA_BITS) - 1)
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
* no point in shifting all network buffers by 2 bytes just to make some IP
@ -48,6 +45,7 @@
#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN)
#define TASK_SIZE_64 (UL(1) << vabits_actual)
#define TASK_SIZE_MAX (UL(1) << VA_BITS)
#ifdef CONFIG_COMPAT
#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)

View File

@ -16,6 +16,11 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
#define INIT_PSTATE_EL1 \
(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h)
#define INIT_PSTATE_EL2 \
(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
/*
* PMR values used to mask/unmask interrupts.
*
@ -188,8 +193,7 @@ struct pt_regs {
s32 syscallno;
u32 unused2;
#endif
u64 orig_addr_limit;
u64 sdei_ttbr1;
/* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
u64 pmr_save;
u64 stackframe[2];

View File

@ -0,0 +1,73 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 Google LLC.
*/
#ifndef __ASM_RWONCE_H
#define __ASM_RWONCE_H
#ifdef CONFIG_LTO
#include <linux/compiler_types.h>
#include <asm/alternative-macros.h>
#ifndef BUILD_VDSO
#ifdef CONFIG_AS_HAS_LDAPR
#define __LOAD_RCPC(sfx, regs...) \
ALTERNATIVE( \
"ldar" #sfx "\t" #regs, \
".arch_extension rcpc\n" \
"ldapr" #sfx "\t" #regs, \
ARM64_HAS_LDAPR)
#else
#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
#endif /* CONFIG_AS_HAS_LDAPR */
/*
* When building with LTO, there is an increased risk of the compiler
* converting an address dependency headed by a READ_ONCE() invocation
* into a control dependency and consequently allowing for harmful
* reordering by the CPU.
*
* Ensure that such transformations are harmless by overriding the generic
* READ_ONCE() definition with one that provides RCpc acquire semantics
* when building with LTO.
*/
#define __READ_ONCE(x) \
({ \
typeof(&(x)) __x = &(x); \
int atomic = 1; \
union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
switch (sizeof(x)) { \
case 1: \
asm volatile(__LOAD_RCPC(b, %w0, %1) \
: "=r" (*(__u8 *)__u.__c) \
: "Q" (*__x) : "memory"); \
break; \
case 2: \
asm volatile(__LOAD_RCPC(h, %w0, %1) \
: "=r" (*(__u16 *)__u.__c) \
: "Q" (*__x) : "memory"); \
break; \
case 4: \
asm volatile(__LOAD_RCPC(, %w0, %1) \
: "=r" (*(__u32 *)__u.__c) \
: "Q" (*__x) : "memory"); \
break; \
case 8: \
asm volatile(__LOAD_RCPC(, %0, %1) \
: "=r" (*(__u64 *)__u.__c) \
: "Q" (*__x) : "memory"); \
break; \
default: \
atomic = 0; \
} \
atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
})
#endif /* !BUILD_VDSO */
#endif /* CONFIG_LTO */
#include <asm-generic/rwonce.h>
#endif /* __ASM_RWONCE_H */

View File

@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];

View File

@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
* Logical CPU mapping.
*/
extern u64 __cpu_logical_map[NR_CPUS];
extern u64 cpu_logical_map(int cpu);
extern u64 cpu_logical_map(unsigned int cpu);
static inline void set_cpu_logical_map(int cpu, u64 hwid)
static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
{
__cpu_logical_map[cpu] = hwid;
}

View File

@ -98,6 +98,10 @@
#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift))
#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x))
#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@ -583,6 +587,9 @@
#define ENDIAN_SET_EL2 0
#endif
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
/* SCTLR_EL1 specific flags. */
#define SCTLR_EL1_ATA0 (BIT(42))
@ -616,12 +623,15 @@
#define ENDIAN_SET_EL1 0
#endif
#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
SCTLR_EL1_DZE | SCTLR_EL1_UCT |\
SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\
ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
#define INIT_SCTLR_EL1_MMU_OFF \
(ENDIAN_SET_EL1 | SCTLR_EL1_RES1)
#define INIT_SCTLR_EL1_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \
SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \
SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \
SCTLR_EL1_RES1)
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)

View File

@ -18,14 +18,11 @@ struct task_struct;
#include <asm/stack_pointer.h>
#include <asm/types.h>
typedef unsigned long mm_segment_t;
/*
* low level task data that entry.S needs immediate access to.
*/
struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
@ -66,8 +63,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@ -93,7 +89,6 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE)
@ -101,7 +96,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
@ -119,7 +114,6 @@ void arch_release_task_struct(struct task_struct *tsk);
{ \
.flags = _TIF_FOREIGN_FPSTATE, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
INIT_SCS \
}

View File

@ -24,44 +24,18 @@
#include <asm/memory.h>
#include <asm/extable.h>
#define get_fs() (current_thread_info()->addr_limit)
static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
/*
* Prevent a mispredicted conditional call to set_fs from forwarding
* the wrong address limit to access_ok under speculation.
*/
spec_bar();
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
/*
* Enable/disable UAO so that copy_to_user() etc can access
* kernel memory with the unprivileged instructions.
*/
if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
else
asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
CONFIG_ARM64_UAO));
}
#define uaccess_kernel() (get_fs() == KERNEL_DS)
#define HAVE_GET_KERNEL_NOFAULT
/*
* Test whether a block of memory is a valid user space address.
* Returns 1 if the range is valid, 0 otherwise.
*
* This is equivalent to the following test:
* (u65)addr + (u65)size <= (u65)current->addr_limit + 1
* (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
*/
static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
{
unsigned long ret, limit = current_thread_info()->addr_limit;
unsigned long ret, limit = TASK_SIZE_MAX - 1;
/*
* Asynchronous I/O running in a kernel thread does not have the
@ -94,7 +68,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
}
#define access_ok(addr, size) __range_ok(addr, size)
#define user_addr_max get_fs
#define _ASM_EXTABLE(from, to) \
" .pushsection __ex_table, \"a\"\n" \
@ -186,47 +159,26 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
#define __uaccess_disable(alt) \
do { \
if (!uaccess_ttbr0_disable()) \
asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \
CONFIG_ARM64_PAN)); \
} while (0)
#define __uaccess_enable(alt) \
do { \
if (!uaccess_ttbr0_enable()) \
asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
CONFIG_ARM64_PAN)); \
} while (0)
static inline void uaccess_disable(void)
static inline void uaccess_disable_privileged(void)
{
__uaccess_disable(ARM64_HAS_PAN);
if (uaccess_ttbr0_disable())
return;
__uaccess_enable_hw_pan();
}
static inline void uaccess_enable(void)
static inline void uaccess_enable_privileged(void)
{
__uaccess_enable(ARM64_HAS_PAN);
if (uaccess_ttbr0_enable())
return;
__uaccess_disable_hw_pan();
}
/*
* These functions are no-ops when UAO is present.
*/
static inline void uaccess_disable_not_uao(void)
{
__uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
}
static inline void uaccess_enable_not_uao(void)
{
__uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
}
/*
* Sanitise a uaccess pointer such that it becomes NULL if above the
* current addr_limit. In case the pointer is tagged (has the top byte set),
* untag the pointer before checking.
* Sanitise a uaccess pointer such that it becomes NULL if above the maximum
* user address. In case the pointer is tagged (has the top byte set), untag
* the pointer before checking.
*/
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@ -237,7 +189,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
" bics xzr, %3, %2\n"
" csel %0, %1, xzr, eq\n"
: "=&r" (safe_ptr)
: "r" (ptr), "r" (current_thread_info()->addr_limit),
: "r" (ptr), "r" (TASK_SIZE_MAX - 1),
"r" (untagged_addr(ptr))
: "cc");
@ -253,10 +205,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
#define __get_mem_asm(load, reg, x, addr, err) \
asm volatile( \
"1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
alt_instr " " reg "1, [%2]\n", feature) \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup, \"ax\"\n" \
" .align 2\n" \
@ -268,35 +219,36 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
: "+r" (err), "=&r" (x) \
: "r" (addr), "i" (-EFAULT))
#define __raw_get_user(x, ptr, err) \
#define __raw_get_mem(ldr, x, ptr, err) \
do { \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \
break; \
case 2: \
__get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \
break; \
case 4: \
__get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \
break; \
case 8: \
__get_user_asm("ldr", "ldtr", "%x", __gu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \
break; \
default: \
BUILD_BUG(); \
} \
uaccess_disable_not_uao(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
#define __raw_get_user(x, ptr, err) \
do { \
__chk_user_ptr(ptr); \
uaccess_ttbr0_enable(); \
__raw_get_mem("ldtr", x, ptr, err); \
uaccess_ttbr0_disable(); \
} while (0)
#define __get_user_error(x, ptr, err) \
do { \
__typeof__(*(ptr)) __user *__p = (ptr); \
@ -318,10 +270,19 @@ do { \
#define get_user __get_user
#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
int __gkn_err = 0; \
\
__raw_get_mem("ldr", *((type *)(dst)), \
(__force type *)(src), __gkn_err); \
if (unlikely(__gkn_err)) \
goto err_label; \
} while (0)
#define __put_mem_asm(store, reg, x, addr, err) \
asm volatile( \
"1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
alt_instr " " reg "1, [%2]\n", feature) \
"1: " store " " reg "1, [%2]\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .align 2\n" \
@ -332,32 +293,33 @@ do { \
: "+r" (err) \
: "r" (x), "r" (addr), "i" (-EFAULT))
#define __raw_put_user(x, ptr, err) \
#define __raw_put_mem(str, x, ptr, err) \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \
break; \
case 2: \
__put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \
break; \
case 4: \
__put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \
break; \
case 8: \
__put_user_asm("str", "sttr", "%x", __pu_val, (ptr), \
(err), ARM64_HAS_UAO); \
__put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \
break; \
default: \
BUILD_BUG(); \
} \
uaccess_disable_not_uao(); \
} while (0)
#define __raw_put_user(x, ptr, err) \
do { \
__chk_user_ptr(ptr); \
uaccess_ttbr0_enable(); \
__raw_put_mem("sttr", x, ptr, err); \
uaccess_ttbr0_disable(); \
} while (0)
#define __put_user_error(x, ptr, err) \
@ -381,14 +343,24 @@ do { \
#define put_user __put_user
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
int __pkn_err = 0; \
\
__raw_put_mem("str", *((type *)(src)), \
(__force type *)(dst), __pkn_err); \
if (unlikely(__pkn_err)) \
goto err_label; \
} while(0)
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
#define raw_copy_from_user(to, from, n) \
({ \
unsigned long __acfu_ret; \
uaccess_enable_not_uao(); \
uaccess_ttbr0_enable(); \
__acfu_ret = __arch_copy_from_user((to), \
__uaccess_mask_ptr(from), (n)); \
uaccess_disable_not_uao(); \
uaccess_ttbr0_disable(); \
__acfu_ret; \
})
@ -396,10 +368,10 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
#define raw_copy_to_user(to, from, n) \
({ \
unsigned long __actu_ret; \
uaccess_enable_not_uao(); \
uaccess_ttbr0_enable(); \
__actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
(from), (n)); \
uaccess_disable_not_uao(); \
uaccess_ttbr0_disable(); \
__actu_ret; \
})
@ -407,10 +379,10 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
#define raw_copy_in_user(to, from, n) \
({ \
unsigned long __aciu_ret; \
uaccess_enable_not_uao(); \
uaccess_ttbr0_enable(); \
__aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \
__uaccess_mask_ptr(from), (n)); \
uaccess_disable_not_uao(); \
uaccess_ttbr0_disable(); \
__aciu_ret; \
})
@ -421,9 +393,9 @@ extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned lo
static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
if (access_ok(to, n)) {
uaccess_enable_not_uao();
uaccess_ttbr0_enable();
n = __arch_clear_user(__uaccess_mask_ptr(to), n);
uaccess_disable_not_uao();
uaccess_ttbr0_disable();
}
return n;
}

View File

@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2];
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
/*
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
if (IS_ENABLED(CONFIG_KVM) &&
static_branch_likely(&kvm_protected_mode_initialized))
return true;
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
}
@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void)
/* Check if the bootloader has booted CPUs in different modes */
static inline bool is_hyp_mode_mismatched(void)
{
/*
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
if (IS_ENABLED(CONFIG_KVM) &&
static_branch_likely(&kvm_protected_mode_initialized))
return false;
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
@ -97,6 +115,14 @@ static __always_inline bool has_vhe(void)
return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
}
static __always_inline bool is_protected_kvm_enabled(void)
{
if (is_vhe_hyp_code())
return false;
else
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
}
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */

View File

@ -21,7 +21,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
static int all_alternatives_applied;
/* Volatile, as we may be patching the guts of READ_ONCE() */
static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
while (!READ_ONCE(all_alternatives_applied))
while (!all_alternatives_applied)
cpu_relax();
isb();
} else {
@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
BUG_ON(all_alternatives_applied);
__apply_alternatives(&region, false, remaining_capabilities);
/* Barriers provided by the cache flushing */
WRITE_ONCE(all_alternatives_applied, 1);
all_alternatives_applied = 1;
}
return 0;

View File

@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void)
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
do { \
uaccess_enable(); \
uaccess_enable_privileged(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
"0: ldxr"B" %w2, [%4]\n" \
@ -302,7 +302,7 @@ do { \
"i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
: "memory"); \
uaccess_disable(); \
uaccess_disable_privileged(); \
} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \

View File

@ -30,7 +30,6 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
@ -70,7 +69,7 @@ int main(void)
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
@ -110,6 +109,11 @@ int main(void)
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2));
DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2));
DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));

View File

@ -74,6 +74,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@ -153,10 +154,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
.width = 0, \
}
/* meta feature for alternatives */
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
static bool __system_matches_cap(unsigned int n);
@ -1600,7 +1597,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
WARN_ON_ONCE(in_interrupt());
sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
asm(SET_PSTATE_PAN(1));
set_pstate_pan(1);
}
#endif /* CONFIG_ARM64_PAN */
@ -1709,6 +1706,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
if (kvm_get_mode() != KVM_MODE_PROTECTED)
return false;
if (is_kernel_in_hyp_mode()) {
pr_warn("Protected KVM not available with VHE\n");
return false;
}
return true;
}
#endif /* CONFIG_KVM */
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@ -1770,28 +1782,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_no_hw_prefetch,
},
#ifdef CONFIG_ARM64_UAO
{
.desc = "User Access Override",
.capability = ARM64_HAS_UAO,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.field_pos = ID_AA64MMFR2_UAO_SHIFT,
.min_field_value = 1,
/*
* We rely on stop_machine() calling uao_thread_switch() to set
* UAO immediately after patching.
*/
},
#endif /* CONFIG_ARM64_UAO */
#ifdef CONFIG_ARM64_PAN
{
.capability = ARM64_ALT_PAN_NOT_UAO,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = cpufeature_pan_not_uao,
},
#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_VHE
{
.desc = "Virtualization Host Extensions",
@ -1822,6 +1812,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL1_SHIFT,
.min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
},
{
.desc = "Protected KVM",
.capability = ARM64_KVM_PROTECTED_MODE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = is_kvm_protected_mode,
},
#endif
{
.desc = "Kernel page table isolation (KPTI)",
@ -2138,6 +2134,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_mte,
},
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
.capability = ARM64_HAS_LDAPR,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
.matches = has_cpuid_feature,
.min_field_value = 1,
},
{},
};
@ -2652,7 +2658,7 @@ bool this_cpu_has_cap(unsigned int n)
* - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
* In all other cases cpus_have_{const_}cap() should be used.
*/
static bool __system_matches_cap(unsigned int n)
static bool __maybe_unused __system_matches_cap(unsigned int n)
{
if (n < ARM64_NCAPS) {
const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
@ -2732,12 +2738,6 @@ void __init setup_cpu_features(void)
ARCH_DMA_MINALIGN);
}
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO));
}
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));

View File

@ -216,12 +216,6 @@ alternative_else_nop_endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
/* Save the task's original addr_limit and set USER_DS */
ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #USER_DS
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
@ -279,12 +273,6 @@ alternative_else_nop_endif
.macro kernel_exit, el
.if \el != 0
disable_daif
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
/* Restore pmr */
@ -999,10 +987,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline)
mov x4, xzr
/*
* Use reg->interrupted_regs.addr_limit to remember whether to unmap
* the kernel on exit.
* Remember whether to unmap the kernel on exit.
*/
1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
#ifdef CONFIG_RANDOMIZE_BASE
adr x4, tramp_vectors + PAGE_SIZE
@ -1023,7 +1010,7 @@ NOKPROBE(__sdei_asm_entry_trampoline)
* x4: struct sdei_registered_event argument from registration time.
*/
SYM_CODE_START(__sdei_asm_exit_trampoline)
ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
cbnz x4, 1f
tramp_unmap_kernel tmp=x4

View File

@ -11,7 +11,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/pgtable.h>
#include <asm/asm_pointer_auth.h>
@ -21,6 +20,7 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
#include <asm/el2_setup.h>
#include <asm/elf.h>
#include <asm/image.h>
#include <asm/kernel-pgtable.h>
@ -104,7 +104,7 @@ pe_header:
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w0=cpu_boot_mode
bl init_kernel_el // w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
@ -482,174 +482,86 @@ EXPORT_SYMBOL(kimage_vaddr)
.section ".idmap.text","awx"
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
* Starting from EL2 or EL1, configure the CPU to execute at the highest
* reachable EL supported by the kernel in a chosen default state. If dropping
* from EL2 to EL1, configure EL2 before configuring EL1.
*
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(el2_setup)
msr SPsel, #1 // We want to use SP_EL{1,2}
SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq 1f
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
msr sctlr_el2, x0
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
msr elr_el1, lr
mov w0, #BOOT_CPU_MODE_EL1
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
* Check for VHE being present. x2 being non-zero indicates that we
* do have VHE, and that the kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
/* Hyp configuration. */
mov_q x0, HCR_HOST_NVHE_FLAGS
cbz x2, set_hcr
mov_q x0, HCR_HOST_VHE_FLAGS
set_hcr:
msr hcr_el2, x0
isb
cbz x2, init_el2_nvhe
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
* and EL0 accesses are configured in the later stage of boot process.
* Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
* as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
* to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
cbnz x2, 1f
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
1:
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 3f
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 3f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
3:
#endif
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
#ifdef CONFIG_COMPAT
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
/* EL2 debug */
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
4:
csel x3, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 7f // Skip if SPE not present
cbnz x2, 6f // VHE?
mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
cbnz x4, 5f // then permit sampling of physical
mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
5:
mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x3, x3, x1 // If we don't have VHE, then
b 7f // use EL1&0 translation.
6: // For VHE, use EL2 translation
orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
7:
msr mdcr_el2, x3 // Configure debug traps
/* LORegions */
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
cbz x0, 1f
msr_s SYS_LORC_EL1, xzr
1:
/* Stage-2 translation */
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
/* Coprocessor traps. */
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
/* SVE register access */
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 7f
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
mov_q x0, HCR_HOST_VHE_FLAGS
msr hcr_el2, x0
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
/* Hypervisor stub */
7: adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
init_el2_state vhe
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
isb
mov_q x0, INIT_PSTATE_EL2
msr spsr_el2, x0
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
mov w0, #BOOT_CPU_MODE_EL2
eret
SYM_FUNC_END(el2_setup)
SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
*/
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
init_el2_state nvhe
/* Hypervisor stub */
adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
isb
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
eret
SYM_FUNC_END(init_kernel_el)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
@ -699,7 +611,7 @@ SYM_DATA_END(__early_cpu_boot_status)
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
bl el2_setup // Drop to EL1, w0=cpu_boot_mode
bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
@ -717,7 +629,7 @@ SYM_FUNC_END(secondary_holding_pen)
* be used where CPUs are brought online dynamically by the kernel.
*/
SYM_FUNC_START(secondary_entry)
bl el2_setup // Drop to EL1
bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
b secondary_startup
SYM_FUNC_END(secondary_entry)

View File

@ -77,9 +77,6 @@ KVM_NVHE_ALIAS(panic);
/* Vectors installed by hyp-init on reset HVC. */
KVM_NVHE_ALIAS(__hyp_stub_vectors);
/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
KVM_NVHE_ALIAS(idmap_t0sz);
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
@ -102,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
/* Array containing bases of nVHE per-CPU memory regions. */
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
#endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */

View File

@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
* ERET is treated as an illegal exception return.
*
* When a user task is created from a kthread, childregs will
* be initialized by start_thread() or start_compat_thread().
*/
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
if (IS_ENABLED(CONFIG_ARM64_UAO) &&
cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
spectre_v4_enable_task_mitigation(p);
if (system_uses_irq_prio_masking())
childregs->pmr_save = GIC_PRIO_IRQON;
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next)
write_sysreg(*task_user_tls(next), tpidr_el0);
}
/* Restore the UAO state depending on next's addr_limit */
void uao_thread_switch(struct task_struct *next)
{
if (IS_ENABLED(CONFIG_ARM64_UAO)) {
if (task_thread_info(next)->addr_limit == KERNEL_DS)
asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
else
asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
}
}
/*
* Force SSBS state on context-switch, since it may be lost after migrating
* from a CPU which treats the bit as RES0 in a heterogeneous system.
@ -554,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);

View File

@ -24,6 +24,7 @@
#include <linux/prctl.h>
#include <linux/sched/task_stack.h>
#include <asm/insn.h>
#include <asm/spectre.h>
#include <asm/traps.h>
#include <asm/virt.h>
@ -520,12 +521,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
if (spectre_v4_mitigations_off()) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
asm volatile(SET_PSTATE_SSBS(1));
set_pstate_ssbs(1);
return SPECTRE_VULNERABLE;
}
/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
asm volatile(SET_PSTATE_SSBS(0));
set_pstate_ssbs(0);
return SPECTRE_MITIGATED;
}

View File

@ -178,12 +178,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
sdei_api_event_context(i, &regs->regs[i]);
}
/*
* We didn't take an exception to get here, set PAN. UAO will be cleared
* by sdei_event_handler()s force_uaccess_begin() call.
*/
__uaccess_enable_hw_pan();
err = sdei_event_handler(regs, arg);
if (err)
return SDEI_EV_FAILED;
@ -222,12 +216,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
return vbar + 0x480;
}
static void __kprobes notrace __sdei_pstate_entry(void)
{
/*
* The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
* whether PSTATE bits are inherited unchanged or generated from
* scratch, and the TF-A implementation always clears PAN and always
* clears UAO. There are no other known implementations.
*
* Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
* PSTATE is modified upon architectural exceptions, and so PAN is
* either inherited or set per SCTLR_ELx.SPAN, and UAO is always
* cleared.
*
* We must explicitly reset PAN to the expected state, including
* clearing it when the host isn't using it, in case a VM had it set.
*/
if (system_uses_hw_pan())
set_pstate_pan(1);
else if (cpu_has_pan())
set_pstate_pan(0);
}
asmlinkage __kprobes notrace unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;
/*
* We didn't take an exception to get here, so the HW hasn't
* set/cleared bits in PSTATE that we may rely on. Initialize PAN.
*/
__sdei_pstate_entry();
nmi_enter();
ret = _sdei_handler(regs, arg);

View File

@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions);
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
u64 cpu_logical_map(int cpu)
u64 cpu_logical_map(unsigned int cpu)
{
return __cpu_logical_map[cpu];
}

View File

@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
trace_hardirqs_off();
do {
/* Check valid user FS if needed */
addr_limit_user_check();
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);

View File

@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
bl init_kernel_el
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir

View File

@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void)
* features that might not have been set correctly.
*/
__uaccess_enable_hw_pan();
uao_thread_switch(current);
/*
* Restore HW breakpoint registers to sane values

View File

@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
$(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
KASAN_SANITIZE := n

View File

@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags

View File

@ -30,6 +30,13 @@ jiffies = jiffies_64;
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
#define HYPERVISOR_DATA_SECTIONS \
HYP_SECTION_NAME(.data..ro_after_init) : { \
__hyp_data_ro_after_init_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
__hyp_data_ro_after_init_end = .; \
}
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@ -37,6 +44,7 @@ jiffies = jiffies_64;
}
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
#endif
@ -201,7 +209,7 @@ SECTIONS
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
*(.init.rodata.* .init.bss) /* from the EFI stub */
*(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
EXIT_DATA
@ -234,6 +242,8 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
HYPERVISOR_DATA_SECTIONS
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback

View File

@ -19,6 +19,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
#include <linux/psci.h>
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
@ -45,10 +46,14 @@
__asm__(".arch_extension virt");
#endif
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
@ -60,6 +65,10 @@ static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS];
extern u32 kvm_nvhe_sym(kvm_host_psci_version);
extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@ -1382,11 +1391,9 @@ static int kvm_init_vector_slots(void)
static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
unsigned long vector_ptr;
unsigned long tpidr_el2;
struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
struct arm_smccc_res res;
unsigned long tcr;
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector());
@ -1396,13 +1403,38 @@ static void cpu_init_hyp_mode(void)
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
pgd_ptr = kvm_mmu_get_httbr();
hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr);
vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector));
params->mair_el2 = read_sysreg(mair_el1);
/*
* The ID map may be configured to use an extended virtual address
* range. This is only the case if system RAM is out of range for the
* currently configured page size and VA_BITS, in which case we will
* also need the extended virtual range for the HYP ID map, or we won't
* be able to enable the EL2 MMU.
*
* However, at EL2, there is only one TTBR register, and we can't switch
* between translation tables *and* update TCR_EL2.T0SZ at the same
* time. Bottom line: we need to use the extended range with *both* our
* translation tables.
*
* So use the same T0SZ value we use for the ID map.
*/
tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
tcr &= ~TCR_T0SZ_MASK;
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
/*
* Flush the init params from the data cache because the struct will
* be read while the MMU is off.
*/
kvm_flush_dcache_to_poc(params, sizeof(*params));
/*
* Call initialization code, and switch to the full blown HYP code.
@ -1411,8 +1443,7 @@ static void cpu_init_hyp_mode(void)
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init),
pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
/*
@ -1501,7 +1532,8 @@ static void _kvm_arch_hardware_disable(void *discard)
void kvm_arch_hardware_disable(void)
{
_kvm_arch_hardware_disable(NULL);
if (!is_protected_kvm_enabled())
_kvm_arch_hardware_disable(NULL);
}
#ifdef CONFIG_CPU_PM
@ -1544,11 +1576,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = {
static void __init hyp_cpu_pm_init(void)
{
cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
if (!is_protected_kvm_enabled())
cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
}
static void __init hyp_cpu_pm_exit(void)
{
cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
if (!is_protected_kvm_enabled())
cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
}
#else
static inline void hyp_cpu_pm_init(void)
@ -1559,6 +1593,36 @@ static inline void hyp_cpu_pm_exit(void)
}
#endif
static void init_cpu_logical_map(void)
{
unsigned int cpu;
/*
* Copy the MPIDR <-> logical CPU ID mapping to hyp.
* Only copy the set of online CPUs whose features have been chacked
* against the finalized system capabilities. The hypervisor will not
* allow any other CPUs from the `possible` set to boot.
*/
for_each_online_cpu(cpu)
kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu);
}
static bool init_psci_relay(void)
{
/*
* If PSCI has not been initialized, protected KVM cannot install
* itself on newly booted CPUs.
*/
if (!psci_ops.get_version) {
kvm_err("Cannot initialize protected mode without PSCI\n");
return false;
}
kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version();
kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids();
return true;
}
static int init_common_resources(void)
{
return kvm_set_ipa_limit();
@ -1606,7 +1670,8 @@ static int init_subsystems(void)
kvm_sys_reg_table_init();
out:
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
if (err || !is_protected_kvm_enabled())
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
return err;
}
@ -1680,6 +1745,14 @@ static int init_hyp_mode(void)
goto out_err;
}
err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
kvm_ksym_ref(__hyp_data_ro_after_init_end),
PAGE_HYP_RO);
if (err) {
kvm_err("Cannot map .hyp.data..ro_after_init section\n");
goto out_err;
}
err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
if (err) {
@ -1723,6 +1796,13 @@ static int init_hyp_mode(void)
}
}
if (is_protected_kvm_enabled()) {
init_cpu_logical_map();
if (!init_psci_relay())
goto out_err;
}
return 0;
out_err:
@ -1847,10 +1927,14 @@ int kvm_arch_init(void *opaque)
if (err)
goto out_hyp;
if (in_hyp_mode)
if (is_protected_kvm_enabled()) {
static_branch_enable(&kvm_protected_mode_initialized);
kvm_info("Protected nVHE mode initialized successfully\n");
} else if (in_hyp_mode) {
kvm_info("VHE mode initialized successfully\n");
else
} else {
kvm_info("Hyp mode initialized successfully\n");
}
return 0;
@ -1868,6 +1952,25 @@ void kvm_arch_exit(void)
kvm_perf_teardown();
}
static int __init early_kvm_mode_cfg(char *arg)
{
if (!arg)
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
kvm_mode = KVM_MODE_PROTECTED;
return 0;
}
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);
enum kvm_mode kvm_get_mode(void)
{
return kvm_mode;
}
static int arm_init(void)
{
int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);

View File

@ -0,0 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Trap handler helpers.
*
* Copyright (C) 2020 - Google LLC
* Author: Marc Zyngier <maz@kernel.org>
*/
#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__
#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__
#include <asm/kvm_host.h>
#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */

View File

@ -6,7 +6,8 @@
asflags-y := -D__KVM_NVHE_HYPERVISOR__
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o

View File

@ -39,6 +39,7 @@ SYM_FUNC_START(__host_exit)
bl handle_trap
/* Restore host regs x0-x17 */
__host_enter_restore_full:
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
@ -61,6 +62,14 @@ __host_enter_without_restoring:
sb
SYM_FUNC_END(__host_exit)
/*
* void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
*/
SYM_FUNC_START(__host_enter)
mov x29, x0
b __host_enter_restore_full
SYM_FUNC_END(__host_enter)
/*
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
@ -180,3 +189,41 @@ SYM_CODE_START(__kvm_hyp_host_vector)
invalid_host_el1_vect // FIQ 32-bit EL1
invalid_host_el1_vect // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_host_vector)
/*
* Forward SMC with arguments in struct kvm_cpu_context, and
* store the result into the same struct. Assumes SMCCC 1.2 or older.
*
* x0: struct kvm_cpu_context*
*/
SYM_CODE_START(__kvm_hyp_host_forward_smc)
/*
* Use x18 to keep the pointer to the host context because
* x18 is callee-saved in SMCCC but not in AAPCS64.
*/
mov x18, x0
ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
smc #0
stp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
stp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
stp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
stp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
stp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
stp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
ret
SYM_CODE_END(__kvm_hyp_host_forward_smc)

View File

@ -9,6 +9,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
@ -47,10 +48,7 @@ __invalid:
/*
* x0: SMCCC function ID
* x1: HYP pgd
* x2: per-CPU offset
* x3: HYP stack
* x4: HYP vectors
* x1: struct kvm_nvhe_init_params PA
*/
__do_hyp_init:
/* Check for a stub HVC call */
@ -71,48 +69,53 @@ __do_hyp_init:
mov x0, #SMCCC_RET_NOT_SUPPORTED
eret
1:
/* Set tpidr_el2 for use by HYP to free a register */
msr tpidr_el2, x2
1: mov x0, x1
mov x4, lr
bl ___kvm_hyp_init
mov lr, x4
phys_to_ttbr x0, x1
alternative_if ARM64_HAS_CNP
orr x0, x0, #TTBR_CNP_BIT
/* Hello, World! */
mov x0, #SMCCC_RET_SUCCESS
eret
SYM_CODE_END(__kvm_hyp_init)
/*
* Initialize the hypervisor in EL2.
*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers
* and leave x4 for the caller.
*
* x0: struct kvm_nvhe_init_params PA
*/
SYM_CODE_START_LOCAL(___kvm_hyp_init)
alternative_if ARM64_KVM_PROTECTED_MODE
mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS
msr hcr_el2, x1
alternative_else_nop_endif
msr ttbr0_el2, x0
mrs x0, tcr_el1
mov_q x1, TCR_EL2_MASK
and x0, x0, x1
mov x1, #TCR_EL2_RES1
orr x0, x0, x1
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
msr tpidr_el2, x1
/*
* The ID map may be configured to use an extended virtual address
* range. This is only the case if system RAM is out of range for the
* currently configured page size and VA_BITS, in which case we will
* also need the extended virtual range for the HYP ID map, or we won't
* be able to enable the EL2 MMU.
*
* However, at EL2, there is only one TTBR register, and we can't switch
* between translation tables *and* update TCR_EL2.T0SZ at the same
* time. Bottom line: we need to use the extended range with *both* our
* translation tables.
*
* So use the same T0SZ value we use for the ID map.
*/
ldr_l x1, idmap_t0sz
bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA]
mov sp, x1
ldr x1, [x0, #NVHE_INIT_MAIR_EL2]
msr mair_el2, x1
ldr x1, [x0, #NVHE_INIT_PGD_PA]
phys_to_ttbr x2, x1
alternative_if ARM64_HAS_CNP
orr x2, x2, #TTBR_CNP_BIT
alternative_else_nop_endif
msr ttbr0_el2, x2
/*
* Set the PS bits in TCR_EL2.
*/
tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
ldr x1, [x0, #NVHE_INIT_TCR_EL2]
tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3
msr tcr_el2, x1
msr tcr_el2, x0
mrs x0, mair_el1
msr mair_el2, x0
isb
/* Invalidate the stale TLBs from Bootloader */
@ -134,14 +137,70 @@ alternative_else_nop_endif
msr sctlr_el2, x0
isb
/* Set the stack and new vectors */
mov sp, x3
msr vbar_el2, x4
/* Set the host vector */
ldr x0, =__kvm_hyp_host_vector
kimg_hyp_va x0, x1
msr vbar_el2, x0
/* Hello, World! */
mov x0, #SMCCC_RET_SUCCESS
eret
SYM_CODE_END(__kvm_hyp_init)
ret
SYM_CODE_END(___kvm_hyp_init)
/*
* PSCI CPU_ON entry point
*
* x0: struct kvm_nvhe_init_params PA
*/
SYM_CODE_START(kvm_hyp_cpu_entry)
mov x1, #1 // is_cpu_on = true
b __kvm_hyp_init_cpu
SYM_CODE_END(kvm_hyp_cpu_entry)
/*
* PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point
*
* x0: struct kvm_nvhe_init_params PA
*/
SYM_CODE_START(kvm_hyp_cpu_resume)
mov x1, #0 // is_cpu_on = false
b __kvm_hyp_init_cpu
SYM_CODE_END(kvm_hyp_cpu_resume)
/*
* Common code for CPU entry points. Initializes EL2 state and
* installs the hypervisor before handing over to a C handler.
*
* x0: struct kvm_nvhe_init_params PA
* x1: bool is_cpu_on
*/
SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
mov x28, x0 // Stash arguments
mov x29, x1
/* Check that the core was booted in EL2. */
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq 2f
/* The core booted in EL1. KVM cannot be initialized on it. */
1: wfe
wfi
b 1b
2: msr SPsel, #1 // We want to use SP_EL{1,2}
/* Initialize EL2 CPU state to sane values. */
init_el2_state nvhe // Clobbers x0..x2
/* Enable MMU, set vectors and stack. */
mov x0, x28
bl ___kvm_hyp_init // Clobbers x0..x3
/* Leave idmap. */
mov x0, x29
ldr x1, =kvm_host_psci_cpu_entry
kimg_hyp_va x1, x2
br x1
SYM_CODE_END(__kvm_hyp_init_cpu)
SYM_CODE_START(__kvm_handle_stub_hvc)
cmp x0, #HVC_SOFT_RESTART
@ -176,6 +235,11 @@ reset:
msr sctlr_el2, x5
isb
alternative_if ARM64_KVM_PROTECTED_MODE
mov_q x5, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x5
alternative_else_nop_endif
/* Install stub vectors */
adr_l x5, __hyp_stub_vectors
msr vbar_el2, x5

View File

@ -12,9 +12,11 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
#include <nvhe/trap_handler.h>
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
@ -150,12 +152,43 @@ inval:
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
}
static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
{
__kvm_hyp_host_forward_smc(host_ctxt);
}
static void skip_host_instruction(void)
{
write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR);
}
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
{
bool handled;
handled = kvm_host_psci_handler(host_ctxt);
if (!handled)
default_host_smc_handler(host_ctxt);
/*
* Unlike HVC, the return address of an SMC is the instruction's PC.
* Move the return address past the instruction.
*/
skip_host_instruction();
}
void handle_trap(struct kvm_cpu_context *host_ctxt)
{
u64 esr = read_sysreg_el2(SYS_ESR);
if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64))
switch (ESR_ELx_EC(esr)) {
case ESR_ELx_EC_HVC64:
handle_host_hcall(host_ctxt);
break;
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
default:
hyp_panic();
handle_host_hcall(host_ctxt);
}
}

View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
/*
* nVHE copy of data structures tracking available CPU cores.
* Only entries for CPUs that were online at KVM init are populated.
* Other CPUs should not be allowed to boot because their features were
* not checked against the finalized system capabilities.
*/
u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
u64 cpu_logical_map(unsigned int cpu)
{
if (cpu >= ARRAY_SIZE(__cpu_logical_map))
hyp_panic();
return __cpu_logical_map[cpu];
}
unsigned long __hyp_per_cpu_offset(unsigned int cpu)
{
unsigned long *cpu_base_array;
unsigned long this_cpu_base;
unsigned long elf_base;
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
hyp_panic();
cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
return this_cpu_base - elf_base;
}

View File

@ -16,4 +16,5 @@ SECTIONS {
HYP_SECTION_NAME(.data..percpu) : {
PERCPU_INPUT(L1_CACHE_BYTES)
}
HYP_SECTION(.data..ro_after_init)
}

View File

@ -0,0 +1,324 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <kvm/arm_hypercalls.h>
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
#include <linux/psci.h>
#include <kvm/arm_psci.h>
#include <uapi/linux/psci.h>
#include <nvhe/trap_handler.h>
void kvm_hyp_cpu_entry(unsigned long r0);
void kvm_hyp_cpu_resume(unsigned long r0);
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
/* Config options set by the host. */
__ro_after_init u32 kvm_host_psci_version;
__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids;
__ro_after_init s64 hyp_physvirt_offset;
#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
#define INVALID_CPU_ID UINT_MAX
struct psci_boot_args {
atomic_t lock;
unsigned long pc;
unsigned long r0;
};
#define PSCI_BOOT_ARGS_UNLOCKED 0
#define PSCI_BOOT_ARGS_LOCKED 1
#define PSCI_BOOT_ARGS_INIT \
((struct psci_boot_args){ \
.lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED), \
})
static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT;
static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT;
static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
return func_id;
}
static bool is_psci_0_1_call(u64 func_id)
{
return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) ||
(func_id == kvm_host_psci_0_1_function_ids.cpu_on) ||
(func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
(func_id == kvm_host_psci_0_1_function_ids.migrate);
}
static bool is_psci_0_2_call(u64 func_id)
{
/* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */
return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) ||
(PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31));
}
static bool is_psci_call(u64 func_id)
{
switch (kvm_host_psci_version) {
case PSCI_VERSION(0, 1):
return is_psci_0_1_call(func_id);
default:
return is_psci_0_2_call(func_id);
}
}
static unsigned long psci_call(unsigned long fn, unsigned long arg0,
unsigned long arg1, unsigned long arg2)
{
struct arm_smccc_res res;
arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
return res.a0;
}
static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt)
{
return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1),
cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3));
}
static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt)
{
psci_forward(host_ctxt);
hyp_panic(); /* unreachable */
}
static unsigned int find_cpu_id(u64 mpidr)
{
unsigned int i;
/* Reject invalid MPIDRs */
if (mpidr & ~MPIDR_HWID_BITMASK)
return INVALID_CPU_ID;
for (i = 0; i < NR_CPUS; i++) {
if (cpu_logical_map(i) == mpidr)
return i;
}
return INVALID_CPU_ID;
}
static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args)
{
return atomic_cmpxchg_acquire(&args->lock,
PSCI_BOOT_ARGS_UNLOCKED,
PSCI_BOOT_ARGS_LOCKED) ==
PSCI_BOOT_ARGS_UNLOCKED;
}
static __always_inline void release_boot_args(struct psci_boot_args *args)
{
atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED);
}
static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, mpidr, host_ctxt, 1);
DECLARE_REG(unsigned long, pc, host_ctxt, 2);
DECLARE_REG(unsigned long, r0, host_ctxt, 3);
unsigned int cpu_id;
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;
int ret;
/*
* Find the logical CPU ID for the given MPIDR. The search set is
* the set of CPUs that were online at the point of KVM initialization.
* Booting other CPUs is rejected because their cpufeatures were not
* checked against the finalized capabilities. This could be relaxed
* by doing the feature checks in hyp.
*/
cpu_id = find_cpu_id(mpidr);
if (cpu_id == INVALID_CPU_ID)
return PSCI_RET_INVALID_PARAMS;
boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
/* Check if the target CPU is already being booted. */
if (!try_acquire_boot_args(boot_args))
return PSCI_RET_ALREADY_ON;
boot_args->pc = pc;
boot_args->r0 = r0;
wmb();
ret = psci_call(func_id, mpidr,
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
__hyp_pa(init_params));
/* If successful, the lock will be released by the target CPU. */
if (ret != PSCI_RET_SUCCESS)
release_boot_args(boot_args);
return ret;
}
static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, power_state, host_ctxt, 1);
DECLARE_REG(unsigned long, pc, host_ctxt, 2);
DECLARE_REG(unsigned long, r0, host_ctxt, 3);
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
/*
* No need to acquire a lock before writing to boot_args because a core
* can only suspend itself. Racy CPU_ON calls use a separate struct.
*/
boot_args->pc = pc;
boot_args->r0 = r0;
/*
* Will either return if shallow sleep state, or wake up into the entry
* point if it is a deep sleep state.
*/
return psci_call(func_id, power_state,
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
__hyp_pa(init_params));
}
static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, pc, host_ctxt, 1);
DECLARE_REG(unsigned long, r0, host_ctxt, 2);
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
/*
* No need to acquire a lock before writing to boot_args because a core
* can only suspend itself. Racy CPU_ON calls use a separate struct.
*/
boot_args->pc = pc;
boot_args->r0 = r0;
/* Will only return on error. */
return psci_call(func_id,
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
__hyp_pa(init_params), 0);
}
asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
{
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
if (is_cpu_on)
boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
else
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
cpu_reg(host_ctxt, 0) = boot_args->r0;
write_sysreg_el2(boot_args->pc, SYS_ELR);
if (is_cpu_on)
release_boot_args(boot_args);
__host_enter(host_ctxt);
}
static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
(func_id == kvm_host_psci_0_1_function_ids.migrate))
return psci_forward(host_ctxt);
else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on)
return psci_cpu_on(func_id, host_ctxt);
else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend)
return psci_cpu_suspend(func_id, host_ctxt);
else
return PSCI_RET_NOT_SUPPORTED;
}
static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
switch (func_id) {
case PSCI_0_2_FN_PSCI_VERSION:
case PSCI_0_2_FN_CPU_OFF:
case PSCI_0_2_FN64_AFFINITY_INFO:
case PSCI_0_2_FN64_MIGRATE:
case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
return psci_forward(host_ctxt);
case PSCI_0_2_FN_SYSTEM_OFF:
case PSCI_0_2_FN_SYSTEM_RESET:
psci_forward_noreturn(host_ctxt);
unreachable();
case PSCI_0_2_FN64_CPU_SUSPEND:
return psci_cpu_suspend(func_id, host_ctxt);
case PSCI_0_2_FN64_CPU_ON:
return psci_cpu_on(func_id, host_ctxt);
default:
return PSCI_RET_NOT_SUPPORTED;
}
}
static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
{
switch (func_id) {
case PSCI_1_0_FN_PSCI_FEATURES:
case PSCI_1_0_FN_SET_SUSPEND_MODE:
case PSCI_1_1_FN64_SYSTEM_RESET2:
return psci_forward(host_ctxt);
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
return psci_system_suspend(func_id, host_ctxt);
default:
return psci_0_2_handler(func_id, host_ctxt);
}
}
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
{
u64 func_id = get_psci_func_id(host_ctxt);
unsigned long ret;
if (!is_psci_call(func_id))
return false;
switch (kvm_host_psci_version) {
case PSCI_VERSION(0, 1):
ret = psci_0_1_handler(func_id, host_ctxt);
break;
case PSCI_VERSION(0, 2):
ret = psci_0_2_handler(func_id, host_ctxt);
break;
default:
ret = psci_1_0_handler(func_id, host_ctxt);
break;
}
cpu_reg(host_ctxt, 0) = ret;
cpu_reg(host_ctxt, 1) = 0;
cpu_reg(host_ctxt, 2) = 0;
cpu_reg(host_ctxt, 3) = 0;
return true;
}

View File

@ -97,7 +97,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
if (is_protected_kvm_enabled())
write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
else
write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}

View File

@ -23,6 +23,30 @@ static u8 tag_lsb;
static u64 tag_val;
static u64 va_mask;
/*
* Compute HYP VA by using the same computation as kern_hyp_va().
*/
static u64 __early_kern_hyp_va(u64 addr)
{
addr &= va_mask;
addr |= tag_val << tag_lsb;
return addr;
}
/*
* Store a hyp VA <-> PA offset into a hyp-owned variable.
*/
static void init_hyp_physvirt_offset(void)
{
extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
u64 kern_va, hyp_va;
/* Compute the offset from the hyp VA and PA of a random symbol. */
kern_va = (u64)kvm_ksym_ref(__hyp_text_start);
hyp_va = __early_kern_hyp_va(kern_va);
CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va;
}
/*
* We want to generate a hyp VA with the following format (with V ==
* vabits_actual):
@ -54,6 +78,8 @@ __init void kvm_compute_layout(void)
tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
tag_val >>= tag_lsb;
init_hyp_physvirt_offset();
}
static u32 compute_instruction(int n, u32 rd, u32 rn)
@ -146,9 +172,7 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
*/
addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
addr &= va_mask;
addr |= tag_val << tag_lsb;
addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector));
/* Use PC[10:7] to branch to the same vector in KVM */
addr |= ((u64)origptr & GENMASK_ULL(10, 7));

View File

@ -24,20 +24,20 @@ SYM_FUNC_START(__arch_clear_user)
subs x1, x1, #8
b.mi 2f
1:
uao_user_alternative 9f, str, sttr, xzr, x0, 8
user_ldst 9f, sttr, xzr, x0, 8
subs x1, x1, #8
b.pl 1b
2: adds x1, x1, #4
b.mi 3f
uao_user_alternative 9f, str, sttr, wzr, x0, 4
user_ldst 9f, sttr, wzr, x0, 4
sub x1, x1, #4
3: adds x1, x1, #2
b.mi 4f
uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
user_ldst 9f, sttrh, wzr, x0, 2
sub x1, x1, #2
4: adds x1, x1, #1
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
user_ldst 9f, sttrb, wzr, x0, 0
5: mov x0, #0
ret
SYM_FUNC_END(__arch_clear_user)

View File

@ -21,7 +21,7 @@
*/
.macro ldrb1 reg, ptr, val
uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
user_ldst 9998f, ldtrb, \reg, \ptr, \val
.endm
.macro strb1 reg, ptr, val
@ -29,7 +29,7 @@
.endm
.macro ldrh1 reg, ptr, val
uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
user_ldst 9998f, ldtrh, \reg, \ptr, \val
.endm
.macro strh1 reg, ptr, val
@ -37,7 +37,7 @@
.endm
.macro ldr1 reg, ptr, val
uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
user_ldst 9998f, ldtr, \reg, \ptr, \val
.endm
.macro str1 reg, ptr, val
@ -45,7 +45,7 @@
.endm
.macro ldp1 reg1, reg2, ptr, val
uao_ldp 9998f, \reg1, \reg2, \ptr, \val
user_ldp 9998f, \reg1, \reg2, \ptr, \val
.endm
.macro stp1 reg1, reg2, ptr, val

View File

@ -22,35 +22,35 @@
* x0 - bytes not copied
*/
.macro ldrb1 reg, ptr, val
uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val
user_ldst 9998f, ldtrb, \reg, \ptr, \val
.endm
.macro strb1 reg, ptr, val
uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
user_ldst 9998f, sttrb, \reg, \ptr, \val
.endm
.macro ldrh1 reg, ptr, val
uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val
user_ldst 9998f, ldtrh, \reg, \ptr, \val
.endm
.macro strh1 reg, ptr, val
uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
user_ldst 9998f, sttrh, \reg, \ptr, \val
.endm
.macro ldr1 reg, ptr, val
uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val
user_ldst 9998f, ldtr, \reg, \ptr, \val
.endm
.macro str1 reg, ptr, val
uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
user_ldst 9998f, sttr, \reg, \ptr, \val
.endm
.macro ldp1 reg1, reg2, ptr, val
uao_ldp 9998f, \reg1, \reg2, \ptr, \val
user_ldp 9998f, \reg1, \reg2, \ptr, \val
.endm
.macro stp1 reg1, reg2, ptr, val
uao_stp 9998f, \reg1, \reg2, \ptr, \val
user_stp 9998f, \reg1, \reg2, \ptr, \val
.endm
end .req x5

View File

@ -24,7 +24,7 @@
.endm
.macro strb1 reg, ptr, val
uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val
user_ldst 9998f, sttrb, \reg, \ptr, \val
.endm
.macro ldrh1 reg, ptr, val
@ -32,7 +32,7 @@
.endm
.macro strh1 reg, ptr, val
uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val
user_ldst 9998f, sttrh, \reg, \ptr, \val
.endm
.macro ldr1 reg, ptr, val
@ -40,7 +40,7 @@
.endm
.macro str1 reg, ptr, val
uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val
user_ldst 9998f, sttr, \reg, \ptr, \val
.endm
.macro ldp1 reg1, reg2, ptr, val
@ -48,7 +48,7 @@
.endm
.macro stp1 reg1, reg2, ptr, val
uao_stp 9998f, \reg1, \reg2, \ptr, \val
user_stp 9998f, \reg1, \reg2, \ptr, \val
.endm
end .req x5

View File

@ -4,7 +4,7 @@
*/
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/asm-uaccess.h>
#include <asm/assembler.h>
#include <asm/mte.h>
#include <asm/page.h>
@ -67,7 +67,7 @@ SYM_FUNC_START(mte_copy_tags_from_user)
mov x3, x1
cbz x2, 2f
1:
uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0
user_ldst 2f, ldtrb, w4, x1, 0
lsl x4, x4, #MTE_TAG_SHIFT
stg x4, [x0], #MTE_GRANULE_SIZE
add x1, x1, #1
@ -94,7 +94,7 @@ SYM_FUNC_START(mte_copy_tags_to_user)
1:
ldg x4, [x1]
ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
uao_user_alternative 2f, strb, sttrb, w4, x0, 0
user_ldst 2f, sttrb, w4, x0, 0
add x0, x0, #1
add x1, x1, #MTE_GRANULE_SIZE
subs x2, x2, #1

View File

@ -30,9 +30,7 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
{
unsigned long rc;
uaccess_enable_not_uao();
rc = __arch_copy_from_user(to, from, n);
uaccess_disable_not_uao();
rc = raw_copy_from_user(to, from, n);
/* See above */
__clean_dcache_area_pop(to, n - rc);

View File

@ -479,11 +479,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die_kernel_fault("access to user memory with fs=KERNEL_DS",
addr, esr, regs);
if (is_el1_instruction_abort(esr))
die_kernel_fault("execution of user memory",
addr, esr, regs);

View File

@ -489,6 +489,6 @@ SYM_FUNC_START(__cpu_setup)
/*
* Prepare SCTLR
*/
mov_q x0, SCTLR_EL1_SET
mov_q x0, INIT_SCTLR_EL1_MMU_ON
ret // return to head.S
SYM_FUNC_END(__cpu_setup)

View File

@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
/*
* The call to use to reach the firmware.
@ -1092,26 +1091,13 @@ int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
int err;
mm_segment_t orig_addr_limit;
u32 event_num = arg->event_num;
/*
* Save restore 'fs'.
* The architecture's entry code save/restores 'fs' when taking an
* exception from the kernel. This ensures addr_limit isn't inherited
* if you interrupted something that allowed the uaccess routines to
* access kernel memory.
* Do the same here because this doesn't come via the same entry code.
*/
orig_addr_limit = force_uaccess_begin();
err = arg->callback(event_num, regs, arg->callback_arg);
if (err)
pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
event_num, smp_processor_id(), err);
force_uaccess_end(orig_addr_limit);
return err;
}
NOKPROBE_SYMBOL(sdei_event_handler);

View File

@ -58,15 +58,12 @@ typedef unsigned long (psci_fn)(unsigned long, unsigned long,
unsigned long, unsigned long);
static psci_fn *invoke_psci_fn;
enum psci_function {
PSCI_FN_CPU_SUSPEND,
PSCI_FN_CPU_ON,
PSCI_FN_CPU_OFF,
PSCI_FN_MIGRATE,
PSCI_FN_MAX,
};
static struct psci_0_1_function_ids psci_0_1_function_ids;
static u32 psci_function_id[PSCI_FN_MAX];
struct psci_0_1_function_ids get_psci_0_1_function_ids(void)
{
return psci_0_1_function_ids;
}
#define PSCI_0_2_POWER_STATE_MASK \
(PSCI_0_2_POWER_STATE_ID_MASK | \
@ -146,7 +143,12 @@ static int psci_to_linux_errno(int errno)
return -EINVAL;
}
static u32 psci_get_version(void)
static u32 psci_0_1_get_version(void)
{
return PSCI_VERSION(0, 1);
}
static u32 psci_0_2_get_version(void)
{
return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
}
@ -163,46 +165,80 @@ int psci_set_osi_mode(bool enable)
return psci_to_linux_errno(err);
}
static int psci_cpu_suspend(u32 state, unsigned long entry_point)
static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point)
{
int err;
u32 fn;
fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
err = invoke_psci_fn(fn, state, entry_point, 0);
return psci_to_linux_errno(err);
}
static int psci_cpu_off(u32 state)
static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point)
{
return __psci_cpu_suspend(psci_0_1_function_ids.cpu_suspend,
state, entry_point);
}
static int psci_0_2_cpu_suspend(u32 state, unsigned long entry_point)
{
return __psci_cpu_suspend(PSCI_FN_NATIVE(0_2, CPU_SUSPEND),
state, entry_point);
}
static int __psci_cpu_off(u32 fn, u32 state)
{
int err;
u32 fn;
fn = psci_function_id[PSCI_FN_CPU_OFF];
err = invoke_psci_fn(fn, state, 0, 0);
return psci_to_linux_errno(err);
}
static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
static int psci_0_1_cpu_off(u32 state)
{
return __psci_cpu_off(psci_0_1_function_ids.cpu_off, state);
}
static int psci_0_2_cpu_off(u32 state)
{
return __psci_cpu_off(PSCI_0_2_FN_CPU_OFF, state);
}
static int __psci_cpu_on(u32 fn, unsigned long cpuid, unsigned long entry_point)
{
int err;
u32 fn;
fn = psci_function_id[PSCI_FN_CPU_ON];
err = invoke_psci_fn(fn, cpuid, entry_point, 0);
return psci_to_linux_errno(err);
}
static int psci_migrate(unsigned long cpuid)
static int psci_0_1_cpu_on(unsigned long cpuid, unsigned long entry_point)
{
return __psci_cpu_on(psci_0_1_function_ids.cpu_on, cpuid, entry_point);
}
static int psci_0_2_cpu_on(unsigned long cpuid, unsigned long entry_point)
{
return __psci_cpu_on(PSCI_FN_NATIVE(0_2, CPU_ON), cpuid, entry_point);
}
static int __psci_migrate(u32 fn, unsigned long cpuid)
{
int err;
u32 fn;
fn = psci_function_id[PSCI_FN_MIGRATE];
err = invoke_psci_fn(fn, cpuid, 0, 0);
return psci_to_linux_errno(err);
}
static int psci_0_1_migrate(unsigned long cpuid)
{
return __psci_migrate(psci_0_1_function_ids.migrate, cpuid);
}
static int psci_0_2_migrate(unsigned long cpuid)
{
return __psci_migrate(PSCI_FN_NATIVE(0_2, MIGRATE), cpuid);
}
static int psci_affinity_info(unsigned long target_affinity,
unsigned long lowest_affinity_level)
{
@ -347,7 +383,7 @@ static void __init psci_init_system_suspend(void)
static void __init psci_init_cpu_suspend(void)
{
int feature = psci_features(psci_function_id[PSCI_FN_CPU_SUSPEND]);
int feature = psci_features(PSCI_FN_NATIVE(0_2, CPU_SUSPEND));
if (feature != PSCI_RET_NOT_SUPPORTED)
psci_cpu_suspend_feature = feature;
@ -421,24 +457,16 @@ static void __init psci_init_smccc(void)
static void __init psci_0_2_set_functions(void)
{
pr_info("Using standard PSCI v0.2 function IDs\n");
psci_ops.get_version = psci_get_version;
psci_function_id[PSCI_FN_CPU_SUSPEND] =
PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
psci_ops.cpu_suspend = psci_cpu_suspend;
psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
psci_ops.cpu_off = psci_cpu_off;
psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON);
psci_ops.cpu_on = psci_cpu_on;
psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE);
psci_ops.migrate = psci_migrate;
psci_ops.affinity_info = psci_affinity_info;
psci_ops.migrate_info_type = psci_migrate_info_type;
psci_ops = (struct psci_operations){
.get_version = psci_0_2_get_version,
.cpu_suspend = psci_0_2_cpu_suspend,
.cpu_off = psci_0_2_cpu_off,
.cpu_on = psci_0_2_cpu_on,
.migrate = psci_0_2_migrate,
.affinity_info = psci_affinity_info,
.migrate_info_type = psci_migrate_info_type,
};
arm_pm_restart = psci_sys_reset;
@ -450,7 +478,7 @@ static void __init psci_0_2_set_functions(void)
*/
static int __init psci_probe(void)
{
u32 ver = psci_get_version();
u32 ver = psci_0_2_get_version();
pr_info("PSCIv%d.%d detected in firmware.\n",
PSCI_VERSION_MAJOR(ver),
@ -514,24 +542,26 @@ static int __init psci_0_1_init(struct device_node *np)
pr_info("Using PSCI v0.1 Function IDs from DT\n");
psci_ops.get_version = psci_0_1_get_version;
if (!of_property_read_u32(np, "cpu_suspend", &id)) {
psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
psci_ops.cpu_suspend = psci_cpu_suspend;
psci_0_1_function_ids.cpu_suspend = id;
psci_ops.cpu_suspend = psci_0_1_cpu_suspend;
}
if (!of_property_read_u32(np, "cpu_off", &id)) {
psci_function_id[PSCI_FN_CPU_OFF] = id;
psci_ops.cpu_off = psci_cpu_off;
psci_0_1_function_ids.cpu_off = id;
psci_ops.cpu_off = psci_0_1_cpu_off;
}
if (!of_property_read_u32(np, "cpu_on", &id)) {
psci_function_id[PSCI_FN_CPU_ON] = id;
psci_ops.cpu_on = psci_cpu_on;
psci_0_1_function_ids.cpu_on = id;
psci_ops.cpu_on = psci_0_1_cpu_on;
}
if (!of_property_read_u32(np, "migrate", &id)) {
psci_function_id[PSCI_FN_MIGRATE] = id;
psci_ops.migrate = psci_migrate;
psci_0_1_function_ids.migrate = id;
psci_ops.migrate = psci_0_1_migrate;
}
return 0;

View File

@ -34,6 +34,15 @@ struct psci_operations {
extern struct psci_operations psci_ops;
struct psci_0_1_function_ids {
u32 cpu_suspend;
u32 cpu_on;
u32 cpu_off;
u32 migrate;
};
struct psci_0_1_function_ids get_psci_0_1_function_ids(void);
#if defined(CONFIG_ARM_PSCI_FW)
int __init psci_dt_init(void);
#else