diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 938781f59468..3278fb4bf219 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -388,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met: - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1. + - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor + must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`. + For CPUs with the Extended Translation Control Register feature (FEAT_TCR2): - If EL3 is present: diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index e02247dcb87e..6a012c98bdcd 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -22,6 +22,7 @@ ARM64 Architecture legacy_instructions memory memory-tagging-extension + mops perf pointer-authentication ptdump diff --git a/Documentation/arch/arm64/mops.rst b/Documentation/arch/arm64/mops.rst new file mode 100644 index 000000000000..2ef5b147f8dc --- /dev/null +++ b/Documentation/arch/arm64/mops.rst @@ -0,0 +1,44 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +Memory copy/set instructions (MOPS) +=================================== + +A MOPS memory copy/set operation consists of three consecutive CPY* or SET* +instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE). + +A main or epilogue instruction can take a MOPS exception for various reasons, +for example when a task is migrated to a CPU with a different MOPS +implementation, or when the instruction's alignment and size requirements are +not met. The software exception handler is then expected to reset the registers +and restart execution from the prologue instruction. Normally this is handled +by the kernel. + +For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in +the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM). + +.. _arm64_mops_hyp: + +Hypervisor requirements +----------------------- + +A hypervisor running a Linux guest must handle all MOPS exceptions from the +guest kernel, as Linux may not be able to handle the exception at all times. +For example, a MOPS exception can be taken when the hypervisor migrates a vCPU +to another physical CPU with a different MOPS implementation. + +To do this, the hypervisor must: + + - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor. + + - Have an exception handler that implements the algorithm from the Arm ARM + rules CNTMJ and MWFQH. + + - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a + potential step of the current instruction. + + Note: Clearing PSTATE.SS is needed so that a single step exception is taken + on the next instruction (the prologue instruction). Otherwise prologue + would get silently stepped over and the single step exception taken on the + main instruction. Note that if the guest instruction is not being stepped + then clearing PSTATE.SS has no effect. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5afd028116c9..cd900e640b10 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2160,6 +2160,9 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu # "ARMv8.7 architectural features" +config AS_HAS_MOPS + def_bool $(as-instr,.arch_extension mops) + menu "ARMv8.9 architectural features" config ARM64_POE diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 13d437bcbf58..8f6ba31b8658 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -105,6 +105,7 @@ void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); int kernel_active_single_step(void); void kernel_rewind_single_step(struct pt_regs *regs); +void kernel_fastforward_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT int reinstall_suspended_bps(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 674518464718..d48fc16584cd 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -75,6 +75,7 @@ void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); +void do_el1_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_signal(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 89bc18989b90..e390c432f546 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -353,6 +353,7 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4713a4c65b1b..58f047de3e1c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -440,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs) set_regs_spsr_ss(regs); } +void kernel_fastforward_single_step(struct pt_regs *regs) +{ + clear_regs_spsr_ss(regs); +} + /* ptrace API */ void user_enable_single_step(struct task_struct *task) { diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index fe74813009bd..b260ddc4d3e9 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -472,6 +472,15 @@ static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_mops(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -517,6 +526,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_GCS: el1_gcs(regs, esr); break; + case ESR_ELx_EC_MOPS: + el1_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_CUR: case ESR_ELx_EC_SOFTSTP_CUR: case ESR_ELx_EC_WATCHPT_CUR: diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index e05249f57075..6438bf62e753 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * Instructions which load PC relative literals are not going to work * when executed from an XOL slot. Instructions doing an exclusive * load/store are not going to complete successfully when single-step - * exception handling happens in the middle of the sequence. + * exception handling happens in the middle of the sequence. Memory + * copy/set instructions require that all three instructions be placed + * consecutively in memory. */ if (aarch64_insn_uses_literal(insn) || - aarch64_insn_is_exclusive(insn)) + aarch64_insn_is_exclusive(insn) || + aarch64_insn_is_mops(insn)) return false; return true; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index fdbcf047108c..ee318f6df647 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -541,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr) user_fastforward_single_step(current); } +void do_el1_mops(struct pt_regs *regs, unsigned long esr) +{ + arm64_mops_reset_regs(®s->user_regs, esr); + + kernel_fastforward_single_step(regs); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index ebde40e7fa2b..bd6f7d5eb6eb 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -15,6 +15,19 @@ * x0 - dest */ SYM_FUNC_START(__pi_clear_page) +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + + mov x1, #PAGE_SIZE + setpn [x0]!, x1!, xzr + setmn [x0]!, x1!, xzr + seten [x0]!, x1!, xzr + ret +.Lno_mops: +#endif mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 6a56d7cf309d..e6374e7e5511 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,19 @@ * x1 - src */ SYM_FUNC_START(__pi_copy_page) +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + + mov x2, #PAGE_SIZE + cpypwn [x0]!, [x1]!, x2! + cpymwn [x0]!, [x1]!, x2! + cpyewn [x0]!, [x1]!, x2! + ret +.Lno_mops: +#endif ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 4ab48d49c451..9b99106fb95f 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,7 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START(__pi_memcpy) +SYM_FUNC_START_LOCAL(__pi_memcpy_generic) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -238,7 +238,24 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy_generic) + +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +SYM_FUNC_START(__pi_memcpy) +alternative_if_not ARM64_HAS_MOPS + b __pi_memcpy_generic +alternative_else_nop_endif + + mov dst, dstin + cpyp [dst]!, [src]!, count! + cpym [dst]!, [src]!, count! + cpye [dst]!, [src]!, count! + ret SYM_FUNC_END(__pi_memcpy) +#else +SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic) +#endif SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a5aebe82ad73..97157da65ec6 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -26,6 +26,7 @@ */ dstin .req x0 +val_x .req x1 val .req w1 count .req x2 tmp1 .req x3 @@ -42,7 +43,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START(__pi_memset) +SYM_FUNC_START_LOCAL(__pi_memset_generic) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -201,7 +202,24 @@ SYM_FUNC_START(__pi_memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret +SYM_FUNC_END(__pi_memset_generic) + +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +SYM_FUNC_START(__pi_memset) +alternative_if_not ARM64_HAS_MOPS + b __pi_memset_generic +alternative_else_nop_endif + + mov dst, dstin + setp [dst]!, count!, val_x + setm [dst]!, count!, val_x + sete [dst]!, count!, val_x + ret SYM_FUNC_END(__pi_memset) +#else +SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic) +#endif SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset)