2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2008-06-24 09:32:36 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
|
|
|
|
*
|
|
|
|
* Modifications for ppc64:
|
|
|
|
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
|
|
|
|
*
|
|
|
|
* Copyright 2008 Michael Ellerman, IBM Corporation.
|
|
|
|
*/
|
|
|
|
|
2010-06-29 05:08:29 +08:00
|
|
|
#include <linux/types.h>
|
2016-07-23 17:12:38 +08:00
|
|
|
#include <linux/jump_label.h>
|
2008-06-24 09:32:36 +08:00
|
|
|
#include <linux/kernel.h>
|
2008-06-24 09:33:03 +08:00
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/init.h>
|
2017-02-04 07:16:44 +08:00
|
|
|
#include <linux/sched/mm.h>
|
2021-05-06 12:49:58 +08:00
|
|
|
#include <linux/stop_machine.h>
|
2008-06-24 09:32:36 +08:00
|
|
|
#include <asm/cputable.h>
|
|
|
|
#include <asm/code-patching.h>
|
2021-06-17 23:51:11 +08:00
|
|
|
#include <asm/interrupt.h>
|
2011-11-14 20:54:47 +08:00
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/sections.h>
|
2016-07-05 13:03:41 +08:00
|
|
|
#include <asm/setup.h>
|
2018-05-22 07:00:00 +08:00
|
|
|
#include <asm/security_features.h>
|
2016-07-05 13:03:41 +08:00
|
|
|
#include <asm/firmware.h>
|
2020-05-06 11:40:26 +08:00
|
|
|
#include <asm/inst.h>
|
2008-06-24 09:32:36 +08:00
|
|
|
|
|
|
|
struct fixup_entry {
|
|
|
|
unsigned long mask;
|
|
|
|
unsigned long value;
|
|
|
|
long start_off;
|
|
|
|
long end_off;
|
powerpc: Introduce infrastructure for feature sections with alternatives
The current feature section logic only supports nop'ing out code, this means
if you want to choose at runtime between instruction sequences, one or both
cases will have to execute the nop'ed out contents of the other section, eg:
BEGIN_FTR_SECTION
or 1,1,1
END_FTR_SECTION_IFSET(FOO)
BEGIN_FTR_SECTION
or 2,2,2
END_FTR_SECTION_IFCLR(FOO)
and the resulting code will be either,
or 1,1,1
nop
or,
nop
or 2,2,2
For small code segments this is fine, but for larger code blocks and in
performance criticial code segments, it would be nice to avoid the nops.
This commit starts to implement logic to allow the following:
BEGIN_FTR_SECTION
or 1,1,1
FTR_SECTION_ELSE
or 2,2,2
ALT_FTR_SECTION_END_IFSET(FOO)
and the resulting code will be:
or 1,1,1
or,
or 2,2,2
We achieve this by extending the existing FTR macros. The current feature
section semantic just becomes a special case, ie. if the else case is empty
we nop out the default case.
The key limitation is that the size of the else case must be less than or
equal to the size of the default case. If the else case is smaller the
remainder of the section is nop'ed.
We let the linker put the else case code in with the rest of the text,
so that relative branches from the else case are more likley to link,
this has the disadvantage that we can't free the unused else cases.
This commit introduces the required macro and linker script changes, but
does not enable the patching of the alternative sections.
We also need to update two hand-made section entries in reg.h and timex.h
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-24 09:32:54 +08:00
|
|
|
long alt_start_off;
|
|
|
|
long alt_end_off;
|
2008-06-24 09:32:36 +08:00
|
|
|
};
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
static u32 *calc_addr(struct fixup_entry *fcur, long offset)
|
2008-06-24 09:32:36 +08:00
|
|
|
{
|
2008-06-24 09:33:02 +08:00
|
|
|
/*
|
|
|
|
* We store the offset to the code as a negative offset from
|
|
|
|
* the start of the alt_entry, to support the VDSO. This
|
|
|
|
* routine converts that back into an actual address.
|
|
|
|
*/
|
2021-05-20 21:50:45 +08:00
|
|
|
return (u32 *)((unsigned long)fcur + offset);
|
2008-06-24 09:33:02 +08:00
|
|
|
}
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
|
2008-06-24 09:33:02 +08:00
|
|
|
{
|
2020-05-06 11:40:25 +08:00
|
|
|
int err;
|
2020-05-06 11:40:31 +08:00
|
|
|
struct ppc_inst instr;
|
2008-06-24 09:33:02 +08:00
|
|
|
|
2020-05-06 11:40:32 +08:00
|
|
|
instr = ppc_inst_read(src);
|
2008-06-24 09:33:02 +08:00
|
|
|
|
2021-05-20 21:50:42 +08:00
|
|
|
if (instr_is_relative_branch(ppc_inst_read(src))) {
|
2021-05-20 21:50:45 +08:00
|
|
|
u32 *target = (u32 *)branch_target(src);
|
2008-06-24 09:33:02 +08:00
|
|
|
|
|
|
|
/* Branch within the section doesn't need translating */
|
2018-04-16 21:25:19 +08:00
|
|
|
if (target < alt_start || target > alt_end) {
|
2020-05-06 11:40:25 +08:00
|
|
|
err = translate_branch(&instr, dest, src);
|
|
|
|
if (err)
|
2008-06-24 09:33:02 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-24 15:31:09 +08:00
|
|
|
raw_patch_instruction(dest, instr);
|
2008-06-24 09:33:02 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
|
|
|
|
{
|
2021-05-20 21:50:45 +08:00
|
|
|
u32 *start, *end, *alt_start, *alt_end, *src, *dest;
|
2008-06-24 09:33:02 +08:00
|
|
|
|
|
|
|
start = calc_addr(fcur, fcur->start_off);
|
|
|
|
end = calc_addr(fcur, fcur->end_off);
|
|
|
|
alt_start = calc_addr(fcur, fcur->alt_start_off);
|
|
|
|
alt_end = calc_addr(fcur, fcur->alt_end_off);
|
|
|
|
|
|
|
|
if ((alt_end - alt_start) > (end - start))
|
|
|
|
return 1;
|
2008-06-24 09:32:36 +08:00
|
|
|
|
|
|
|
if ((value & fcur->mask) == fcur->value)
|
2008-06-24 09:33:02 +08:00
|
|
|
return 0;
|
2008-06-24 09:32:36 +08:00
|
|
|
|
2008-06-24 09:33:02 +08:00
|
|
|
src = alt_start;
|
|
|
|
dest = start;
|
2008-06-24 09:32:36 +08:00
|
|
|
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
for (; src < alt_end; src = ppc_inst_next(src, src),
|
|
|
|
dest = ppc_inst_next(dest, dest)) {
|
2008-06-24 09:33:02 +08:00
|
|
|
if (patch_alt_instruction(src, dest, alt_start, alt_end))
|
|
|
|
return 1;
|
2008-06-24 09:32:36 +08:00
|
|
|
}
|
2008-06-24 09:33:02 +08:00
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
for (; dest < end; dest++)
|
|
|
|
raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
|
2008-06-24 09:33:02 +08:00
|
|
|
|
|
|
|
return 0;
|
2008-06-24 09:32:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
|
|
|
{
|
|
|
|
struct fixup_entry *fcur, *fend;
|
|
|
|
|
|
|
|
fcur = fixup_start;
|
|
|
|
fend = fixup_end;
|
|
|
|
|
2008-06-24 09:33:02 +08:00
|
|
|
for (; fcur < fend; fcur++) {
|
|
|
|
if (patch_feature_section(value, fcur)) {
|
2008-07-17 12:46:00 +08:00
|
|
|
WARN_ON(1);
|
2008-06-24 09:33:02 +08:00
|
|
|
printk("Unable to patch feature section at %p - %p" \
|
|
|
|
" with %p - %p\n",
|
|
|
|
calc_addr(fcur, fcur->start_off),
|
|
|
|
calc_addr(fcur, fcur->end_off),
|
|
|
|
calc_addr(fcur, fcur->alt_start_off),
|
|
|
|
calc_addr(fcur, fcur->alt_end_off));
|
|
|
|
}
|
|
|
|
}
|
2008-06-24 09:32:36 +08:00
|
|
|
}
|
2008-06-24 09:33:03 +08:00
|
|
|
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
2018-10-22 22:54:17 +08:00
|
|
|
static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
|
2018-05-22 07:00:00 +08:00
|
|
|
{
|
|
|
|
unsigned int instrs[3], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
2020-12-01 22:43:44 +08:00
|
|
|
start = PTRRELOC(&__start___stf_entry_barrier_fixup);
|
2018-05-22 07:00:00 +08:00
|
|
|
end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_RAW_NOP();
|
|
|
|
instrs[1] = PPC_RAW_NOP();
|
|
|
|
instrs[2] = PPC_RAW_NOP();
|
2018-05-22 07:00:00 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (types & STF_BARRIER_FALLBACK) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MFLR(_R10);
|
|
|
|
instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
|
|
|
|
instrs[i++] = PPC_RAW_MTLR(_R10);
|
2018-05-22 07:00:00 +08:00
|
|
|
} else if (types & STF_BARRIER_EIEIO) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
|
2018-05-22 07:00:00 +08:00
|
|
|
} else if (types & STF_BARRIER_SYNC_ORI) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_SYNC();
|
|
|
|
instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
|
|
|
|
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
2021-05-13 22:08:00 +08:00
|
|
|
// See comment in do_entry_flush_fixups() RE order of patching
|
|
|
|
if (types & STF_BARRIER_FALLBACK) {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_branch(dest + 1,
|
2021-05-13 22:08:00 +08:00
|
|
|
(unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
|
|
|
|
} else {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
2021-05-13 22:08:00 +08:00
|
|
|
}
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
|
|
|
|
(types == STF_BARRIER_NONE) ? "no" :
|
|
|
|
(types == STF_BARRIER_FALLBACK) ? "fallback" :
|
|
|
|
(types == STF_BARRIER_EIEIO) ? "eieio" :
|
|
|
|
(types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
|
|
|
|
: "unknown");
|
|
|
|
}
|
|
|
|
|
2018-10-22 22:54:17 +08:00
|
|
|
static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
|
2018-05-22 07:00:00 +08:00
|
|
|
{
|
|
|
|
unsigned int instrs[6], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
2020-12-01 22:43:44 +08:00
|
|
|
start = PTRRELOC(&__start___stf_exit_barrier_fixup);
|
2018-05-22 07:00:00 +08:00
|
|
|
end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_RAW_NOP();
|
|
|
|
instrs[1] = PPC_RAW_NOP();
|
|
|
|
instrs[2] = PPC_RAW_NOP();
|
|
|
|
instrs[3] = PPC_RAW_NOP();
|
|
|
|
instrs[4] = PPC_RAW_NOP();
|
|
|
|
instrs[5] = PPC_RAW_NOP();
|
2018-05-22 07:00:00 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
|
|
|
|
if (cpu_has_feature(CPU_FTR_HVMODE)) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
|
|
|
|
instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
|
2018-05-22 07:00:00 +08:00
|
|
|
} else {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
|
|
|
|
instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_SYNC();
|
|
|
|
instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
|
|
|
|
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
|
|
|
if (cpu_has_feature(CPU_FTR_HVMODE))
|
|
|
|
instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
|
|
|
|
else
|
|
|
|
instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
|
2018-05-22 07:00:00 +08:00
|
|
|
} else if (types & STF_BARRIER_EIEIO) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_instruction(dest + 3, ppc_inst(instrs[3]));
|
|
|
|
patch_instruction(dest + 4, ppc_inst(instrs[4]));
|
|
|
|
patch_instruction(dest + 5, ppc_inst(instrs[5]));
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
|
|
|
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
|
|
|
|
(types == STF_BARRIER_NONE) ? "no" :
|
|
|
|
(types == STF_BARRIER_FALLBACK) ? "fallback" :
|
|
|
|
(types == STF_BARRIER_EIEIO) ? "eieio" :
|
|
|
|
(types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
|
|
|
|
: "unknown");
|
|
|
|
}
|
|
|
|
|
2021-06-17 23:51:11 +08:00
|
|
|
static bool stf_exit_reentrant = false;
|
|
|
|
static bool rfi_exit_reentrant = false;
|
|
|
|
|
2021-05-06 12:49:58 +08:00
|
|
|
static int __do_stf_barrier_fixups(void *data)
|
|
|
|
{
|
|
|
|
enum stf_barrier_type *types = data;
|
|
|
|
|
|
|
|
do_stf_entry_barrier_fixups(*types);
|
|
|
|
do_stf_exit_barrier_fixups(*types);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2018-05-22 07:00:00 +08:00
|
|
|
|
|
|
|
void do_stf_barrier_fixups(enum stf_barrier_type types)
|
|
|
|
{
|
2021-05-06 12:49:58 +08:00
|
|
|
/*
|
|
|
|
* The call to the fallback entry flush, and the fallback/sync-ori exit
|
2021-06-17 23:51:11 +08:00
|
|
|
* flush can not be safely patched in/out while other CPUs are
|
|
|
|
* executing them. So call __do_stf_barrier_fixups() on one CPU while
|
|
|
|
* all other CPUs spin in the stop machine core with interrupts hard
|
|
|
|
* disabled.
|
|
|
|
*
|
|
|
|
* The branch to mark interrupt exits non-reentrant is enabled first,
|
|
|
|
* then stop_machine runs which will ensure all CPUs are out of the
|
|
|
|
* low level interrupt exit code before patching. After the patching,
|
|
|
|
* if allowed, then flip the branch to allow fast exits.
|
2021-05-06 12:49:58 +08:00
|
|
|
*/
|
2021-06-17 23:51:11 +08:00
|
|
|
static_branch_enable(&interrupt_exit_not_reentrant);
|
|
|
|
|
2021-05-06 12:49:58 +08:00
|
|
|
stop_machine(__do_stf_barrier_fixups, &types, NULL);
|
2021-06-17 23:51:11 +08:00
|
|
|
|
|
|
|
if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
|
|
|
|
stf_exit_reentrant = false;
|
|
|
|
else
|
|
|
|
stf_exit_reentrant = true;
|
|
|
|
|
|
|
|
if (stf_exit_reentrant && rfi_exit_reentrant)
|
|
|
|
static_branch_disable(&interrupt_exit_not_reentrant);
|
2018-05-22 07:00:00 +08:00
|
|
|
}
|
|
|
|
|
2020-11-17 13:59:13 +08:00
|
|
|
void do_uaccess_flush_fixups(enum l1d_flush_type types)
|
|
|
|
{
|
|
|
|
unsigned int instrs[4], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
start = PTRRELOC(&__start___uaccess_flush_fixup);
|
|
|
|
end = PTRRELOC(&__stop___uaccess_flush_fixup);
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_RAW_NOP();
|
|
|
|
instrs[1] = PPC_RAW_NOP();
|
|
|
|
instrs[2] = PPC_RAW_NOP();
|
|
|
|
instrs[3] = PPC_RAW_BLR();
|
2020-11-17 13:59:13 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (types == L1D_FLUSH_FALLBACK) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[3] = PPC_RAW_NOP();
|
2020-11-17 13:59:13 +08:00
|
|
|
/* fallthrough to fallback flush */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_ORI) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
|
|
|
instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
|
2020-11-17 13:59:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_MTTRIG)
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
|
2020-11-17 13:59:13 +08:00
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
2020-11-17 13:59:13 +08:00
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_instruction(dest + 3, ppc_inst(instrs[3]));
|
2020-11-17 13:59:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
|
|
|
|
(types == L1D_FLUSH_NONE) ? "no" :
|
|
|
|
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
|
|
|
|
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
|
|
|
|
? "ori+mttrig type"
|
|
|
|
: "ori type" :
|
|
|
|
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
|
|
|
|
: "unknown");
|
|
|
|
}
|
|
|
|
|
2021-05-06 12:49:59 +08:00
|
|
|
static int __do_entry_flush_fixups(void *data)
|
2020-11-17 13:59:12 +08:00
|
|
|
{
|
2021-05-06 12:49:59 +08:00
|
|
|
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
|
2020-11-17 13:59:12 +08:00
|
|
|
unsigned int instrs[3], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_RAW_NOP();
|
|
|
|
instrs[1] = PPC_RAW_NOP();
|
|
|
|
instrs[2] = PPC_RAW_NOP();
|
2020-11-17 13:59:12 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (types == L1D_FLUSH_FALLBACK) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MFLR(_R10);
|
|
|
|
instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
|
|
|
|
instrs[i++] = PPC_RAW_MTLR(_R10);
|
2020-11-17 13:59:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_ORI) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
|
|
|
instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
|
2020-11-17 13:59:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_MTTRIG)
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
|
2020-11-17 13:59:12 +08:00
|
|
|
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
/*
|
|
|
|
* If we're patching in or out the fallback flush we need to be careful about the
|
|
|
|
* order in which we patch instructions. That's because it's possible we could
|
|
|
|
* take a page fault after patching one instruction, so the sequence of
|
|
|
|
* instructions must be safe even in a half patched state.
|
|
|
|
*
|
|
|
|
* To make that work, when patching in the fallback flush we patch in this order:
|
|
|
|
* - the mflr (dest)
|
|
|
|
* - the mtlr (dest + 2)
|
|
|
|
* - the branch (dest + 1)
|
|
|
|
*
|
|
|
|
* That ensures the sequence is safe to execute at any point. In contrast if we
|
|
|
|
* patch the mtlr last, it's possible we could return from the branch and not
|
|
|
|
* restore LR, leading to a crash later.
|
|
|
|
*
|
|
|
|
* When patching out the fallback flush (either with nops or another flush type),
|
|
|
|
* we patch in this order:
|
|
|
|
* - the branch (dest + 1)
|
|
|
|
* - the mtlr (dest + 2)
|
|
|
|
* - the mflr (dest)
|
|
|
|
*
|
|
|
|
* Note we are protected by stop_machine() from other CPUs executing the code in a
|
|
|
|
* semi-patched state.
|
|
|
|
*/
|
|
|
|
|
2021-01-11 14:24:08 +08:00
|
|
|
start = PTRRELOC(&__start___entry_flush_fixup);
|
|
|
|
end = PTRRELOC(&__stop___entry_flush_fixup);
|
2020-11-17 13:59:12 +08:00
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
if (types == L1D_FLUSH_FALLBACK) {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_branch(dest + 1,
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
(unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
|
|
|
|
} else {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
}
|
2020-11-17 13:59:12 +08:00
|
|
|
}
|
|
|
|
|
2021-01-11 14:24:08 +08:00
|
|
|
start = PTRRELOC(&__start___scv_entry_flush_fixup);
|
|
|
|
end = PTRRELOC(&__stop___scv_entry_flush_fixup);
|
|
|
|
for (; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
if (types == L1D_FLUSH_FALLBACK) {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_branch(dest + 1,
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
(unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
|
|
|
|
} else {
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
powerpc/64s: Fix entry flush patching w/strict RWX & hash
The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.
With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.
This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.
On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.
However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:
# echo 0 > /sys/kernel/debug/powerpc/entry_flush
kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
BUG: Unable to handle kernel instruction fetch
Faulting instruction address: 0xc000000002971000
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
NIP: c000000002971000 LR: c000000002971000 CTR: c000000000120c40
REGS: c000000013243840 TRAP: 0400 Not tainted (5.13.0-rc1-00010-gda3bb206c9ce)
MSR: 8000000010009033 <SF,EE,ME,IR,DR,RI,LE> CR: 48428482 XER: 00000000
...
NIP 0xc000000002971000
LR 0xc000000002971000
Call Trace:
do_patch_instruction+0xc4/0x340 (unreliable)
do_entry_flush_fixups+0x100/0x3b0
entry_flush_set+0x50/0xe0
simple_attr_write+0x160/0x1a0
full_proxy_write+0x8c/0x110
vfs_write+0xf0/0x340
ksys_write+0x84/0x140
system_call_exception+0x164/0x2d0
system_call_common+0xec/0x278
The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.
Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
2021-05-13 22:07:59 +08:00
|
|
|
}
|
2021-01-11 14:24:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-11-17 13:59:12 +08:00
|
|
|
printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
|
|
|
|
(types == L1D_FLUSH_NONE) ? "no" :
|
|
|
|
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
|
|
|
|
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
|
|
|
|
? "ori+mttrig type"
|
|
|
|
: "ori type" :
|
|
|
|
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
|
|
|
|
: "unknown");
|
2021-05-06 12:49:59 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void do_entry_flush_fixups(enum l1d_flush_type types)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The call to the fallback flush can not be safely patched in/out while
|
|
|
|
* other CPUs are executing it. So call __do_entry_flush_fixups() on one
|
|
|
|
* CPU while all other CPUs spin in the stop machine core with interrupts
|
|
|
|
* hard disabled.
|
|
|
|
*/
|
|
|
|
stop_machine(__do_entry_flush_fixups, &types, NULL);
|
2020-11-17 13:59:12 +08:00
|
|
|
}
|
|
|
|
|
2021-06-17 23:51:11 +08:00
|
|
|
static int __do_rfi_flush_fixups(void *data)
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
{
|
2021-06-17 23:51:11 +08:00
|
|
|
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
unsigned int instrs[3], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
2020-12-01 22:43:44 +08:00
|
|
|
start = PTRRELOC(&__start___rfi_flush_fixup);
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
end = PTRRELOC(&__stop___rfi_flush_fixup);
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_RAW_NOP();
|
|
|
|
instrs[1] = PPC_RAW_NOP();
|
|
|
|
instrs[2] = PPC_RAW_NOP();
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
|
|
|
|
if (types & L1D_FLUSH_FALLBACK)
|
|
|
|
/* b .+16 to fallback flush */
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[0] = PPC_INST_BRANCH | 16;
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (types & L1D_FLUSH_ORI) {
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
|
|
|
instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_MTTRIG)
|
2021-05-20 18:23:09 +08:00
|
|
|
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instrs[0]));
|
|
|
|
patch_instruction(dest + 1, ppc_inst(instrs[1]));
|
|
|
|
patch_instruction(dest + 2, ppc_inst(instrs[2]));
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
}
|
|
|
|
|
2018-03-15 06:40:41 +08:00
|
|
|
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
|
|
|
|
(types == L1D_FLUSH_NONE) ? "no" :
|
|
|
|
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
|
|
|
|
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
|
|
|
|
? "ori+mttrig type"
|
|
|
|
: "ori type" :
|
|
|
|
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
|
|
|
|
: "unknown");
|
2021-06-17 23:51:11 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void do_rfi_flush_fixups(enum l1d_flush_type types)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* stop_machine gets all CPUs out of the interrupt exit handler same
|
|
|
|
* as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
|
|
|
|
* without stop_machine, so this could be achieved with a broadcast
|
|
|
|
* IPI instead, but this matches the stf sequence.
|
|
|
|
*/
|
|
|
|
static_branch_enable(&interrupt_exit_not_reentrant);
|
|
|
|
|
|
|
|
stop_machine(__do_rfi_flush_fixups, &types, NULL);
|
|
|
|
|
|
|
|
if (types & L1D_FLUSH_FALLBACK)
|
|
|
|
rfi_exit_reentrant = false;
|
|
|
|
else
|
|
|
|
rfi_exit_reentrant = true;
|
|
|
|
|
|
|
|
if (stf_exit_reentrant && rfi_exit_reentrant)
|
|
|
|
static_branch_disable(&interrupt_exit_not_reentrant);
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
}
|
2018-04-24 12:15:55 +08:00
|
|
|
|
2018-04-24 12:15:56 +08:00
|
|
|
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
|
2018-04-24 12:15:55 +08:00
|
|
|
{
|
|
|
|
unsigned int instr, *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
2018-04-24 12:15:56 +08:00
|
|
|
start = fixup_start;
|
|
|
|
end = fixup_end;
|
2018-04-24 12:15:55 +08:00
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instr = PPC_RAW_NOP();
|
2018-04-24 12:15:55 +08:00
|
|
|
|
|
|
|
if (enable) {
|
|
|
|
pr_info("barrier-nospec: using ORI speculation barrier\n");
|
2021-05-20 18:23:09 +08:00
|
|
|
instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
|
2018-04-24 12:15:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instr));
|
2018-04-24 12:15:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
|
|
|
|
}
|
|
|
|
|
2018-07-28 07:06:34 +08:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BARRIER_NOSPEC
|
2018-04-24 12:15:56 +08:00
|
|
|
void do_barrier_nospec_fixups(bool enable)
|
|
|
|
{
|
|
|
|
void *start, *end;
|
|
|
|
|
2020-12-01 22:43:44 +08:00
|
|
|
start = PTRRELOC(&__start___barrier_nospec_fixup);
|
2018-04-24 12:15:56 +08:00
|
|
|
end = PTRRELOC(&__stop___barrier_nospec_fixup);
|
|
|
|
|
|
|
|
do_barrier_nospec_fixups_range(enable, start, end);
|
|
|
|
}
|
2018-07-28 07:06:34 +08:00
|
|
|
#endif /* CONFIG_PPC_BARRIER_NOSPEC */
|
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.
This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.
The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.
In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.
In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.
For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.
In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.
Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-01-10 00:07:15 +08:00
|
|
|
|
2018-07-28 07:06:37 +08:00
|
|
|
#ifdef CONFIG_PPC_FSL_BOOK3E
|
|
|
|
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
|
|
|
|
{
|
|
|
|
unsigned int instr[2], *dest;
|
|
|
|
long *start, *end;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
start = fixup_start;
|
|
|
|
end = fixup_end;
|
|
|
|
|
2021-05-20 18:23:09 +08:00
|
|
|
instr[0] = PPC_RAW_NOP();
|
|
|
|
instr[1] = PPC_RAW_NOP();
|
2018-07-28 07:06:37 +08:00
|
|
|
|
|
|
|
if (enable) {
|
|
|
|
pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
|
2021-05-20 18:23:09 +08:00
|
|
|
instr[0] = PPC_RAW_ISYNC();
|
|
|
|
instr[1] = PPC_RAW_SYNC();
|
2018-07-28 07:06:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; start < end; start++, i++) {
|
|
|
|
dest = (void *)start + *start;
|
|
|
|
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)dest);
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(dest, ppc_inst(instr[0]));
|
|
|
|
patch_instruction(dest + 1, ppc_inst(instr[1]));
|
2018-07-28 07:06:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
|
|
|
|
}
|
2018-12-12 22:03:00 +08:00
|
|
|
|
|
|
|
static void patch_btb_flush_section(long *curr)
|
|
|
|
{
|
|
|
|
unsigned int *start, *end;
|
|
|
|
|
|
|
|
start = (void *)curr + *curr;
|
|
|
|
end = (void *)curr + *(curr + 1);
|
|
|
|
for (; start < end; start++) {
|
|
|
|
pr_devel("patching dest %lx\n", (unsigned long)start);
|
2021-05-20 21:50:45 +08:00
|
|
|
patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
|
2018-12-12 22:03:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void do_btb_flush_fixups(void)
|
|
|
|
{
|
|
|
|
long *start, *end;
|
|
|
|
|
|
|
|
start = PTRRELOC(&__start__btb_flush_fixup);
|
|
|
|
end = PTRRELOC(&__stop__btb_flush_fixup);
|
|
|
|
|
|
|
|
for (; start < end; start += 2)
|
|
|
|
patch_btb_flush_section(start);
|
|
|
|
}
|
2018-07-28 07:06:37 +08:00
|
|
|
#endif /* CONFIG_PPC_FSL_BOOK3E */
|
|
|
|
|
2008-07-01 23:16:40 +08:00
|
|
|
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
|
|
|
|
{
|
2010-02-26 15:29:17 +08:00
|
|
|
long *start, *end;
|
2021-05-20 21:50:45 +08:00
|
|
|
u32 *dest;
|
2008-07-01 23:16:40 +08:00
|
|
|
|
|
|
|
if (!(value & CPU_FTR_LWSYNC))
|
|
|
|
return ;
|
|
|
|
|
|
|
|
start = fixup_start;
|
|
|
|
end = fixup_end;
|
|
|
|
|
|
|
|
for (; start < end; start++) {
|
|
|
|
dest = (void *)start + *start;
|
2020-05-06 11:40:26 +08:00
|
|
|
raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC));
|
2008-07-01 23:16:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-05 13:03:41 +08:00
|
|
|
static void do_final_fixups(void)
|
2011-11-14 20:54:47 +08:00
|
|
|
{
|
|
|
|
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
|
2021-05-20 21:50:45 +08:00
|
|
|
struct ppc_inst inst;
|
|
|
|
u32 *src, *dest, *end;
|
2011-11-14 20:54:47 +08:00
|
|
|
|
|
|
|
if (PHYSICAL_START == 0)
|
|
|
|
return;
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
src = (u32 *)(KERNELBASE + PHYSICAL_START);
|
|
|
|
dest = (u32 *)KERNELBASE;
|
2020-05-06 11:40:37 +08:00
|
|
|
end = (void *)src + (__end_interrupts - _stext);
|
2011-11-14 20:54:47 +08:00
|
|
|
|
2020-05-06 11:40:37 +08:00
|
|
|
while (src < end) {
|
|
|
|
inst = ppc_inst_read(src);
|
|
|
|
raw_patch_instruction(dest, inst);
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
src = ppc_inst_next(src, src);
|
|
|
|
dest = ppc_inst_next(dest, dest);
|
2011-11-14 20:54:47 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-07-26 20:29:18 +08:00
|
|
|
static unsigned long __initdata saved_cpu_features;
|
|
|
|
static unsigned int __initdata saved_mmu_features;
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
static unsigned long __initdata saved_firmware_features;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void __init apply_feature_fixups(void)
|
2016-07-05 13:03:41 +08:00
|
|
|
{
|
2016-08-02 13:53:01 +08:00
|
|
|
struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
|
2016-07-05 13:03:41 +08:00
|
|
|
|
2016-07-26 20:29:18 +08:00
|
|
|
*PTRRELOC(&saved_cpu_features) = spec->cpu_features;
|
|
|
|
*PTRRELOC(&saved_mmu_features) = spec->mmu_features;
|
|
|
|
|
2016-07-05 13:03:41 +08:00
|
|
|
/*
|
|
|
|
* Apply the CPU-specific and firmware specific fixups to kernel text
|
|
|
|
* (nop out sections not relevant to this CPU or this firmware).
|
|
|
|
*/
|
|
|
|
do_feature_fixups(spec->cpu_features,
|
|
|
|
PTRRELOC(&__start___ftr_fixup),
|
|
|
|
PTRRELOC(&__stop___ftr_fixup));
|
|
|
|
|
|
|
|
do_feature_fixups(spec->mmu_features,
|
|
|
|
PTRRELOC(&__start___mmu_ftr_fixup),
|
|
|
|
PTRRELOC(&__stop___mmu_ftr_fixup));
|
|
|
|
|
|
|
|
do_lwsync_fixups(spec->cpu_features,
|
|
|
|
PTRRELOC(&__start___lwsync_fixup),
|
|
|
|
PTRRELOC(&__stop___lwsync_fixup));
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
2016-07-26 20:29:18 +08:00
|
|
|
saved_firmware_features = powerpc_firmware_features;
|
2016-07-05 13:03:41 +08:00
|
|
|
do_feature_fixups(powerpc_firmware_features,
|
|
|
|
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
|
|
|
|
#endif
|
|
|
|
do_final_fixups();
|
2016-08-10 15:27:34 +08:00
|
|
|
}
|
2016-07-23 17:12:38 +08:00
|
|
|
|
2016-08-10 15:27:34 +08:00
|
|
|
void __init setup_feature_keys(void)
|
|
|
|
{
|
2016-07-23 17:12:38 +08:00
|
|
|
/*
|
|
|
|
* Initialise jump label. This causes all the cpu/mmu_has_feature()
|
|
|
|
* checks to take on their correct polarity based on the current set of
|
|
|
|
* CPU/MMU features.
|
|
|
|
*/
|
|
|
|
jump_label_init();
|
powerpc: Add option to use jump label for cpu_has_feature()
We do binary patching of asm code using CPU features, which is a
one-time operation, done during early boot. However checks of CPU
features in C code are currently done at run time, even though the set
of CPU features can never change after boot.
We can optimise this by using jump labels to implement cpu_has_feature(),
meaning checks in C code are binary patched into a single nop or branch.
For a C sequence along the lines of:
if (cpu_has_feature(FOO))
return 2;
The generated code before is roughly:
ld r9,-27640(r2)
ld r9,0(r9)
lwz r9,32(r9)
cmpwi cr7,r9,0
bge cr7, 1f
li r3,2
blr
1: ...
After (true):
nop
li r3,2
blr
After (false):
b 1f
li r3,2
blr
1: ...
mpe: Rename MAX_CPU_FEATURES as we already have a #define with that
name, and define it simply as a constant, rather than doing tricks with
sizeof and NULL pointers. Rename the array to cpu_feature_keys. Use the
kconfig we added to guard it. Add BUILD_BUG_ON() if the feature is not a
compile time constant. Rewrite the change log.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2016-07-23 17:12:41 +08:00
|
|
|
cpu_feature_keys_init();
|
2016-07-23 17:12:42 +08:00
|
|
|
mmu_feature_keys_init();
|
2016-07-05 13:03:41 +08:00
|
|
|
}
|
|
|
|
|
2016-07-26 20:29:18 +08:00
|
|
|
static int __init check_features(void)
|
|
|
|
{
|
|
|
|
WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
|
|
|
|
"CPU features changed after feature patching!\n");
|
|
|
|
WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
|
|
|
|
"MMU features changed after feature patching!\n");
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
WARN(saved_firmware_features != powerpc_firmware_features,
|
|
|
|
"Firmware features changed after feature patching!\n");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
late_initcall(check_features);
|
|
|
|
|
2008-06-24 09:33:03 +08:00
|
|
|
#ifdef CONFIG_FTR_FIXUP_SELFTEST
|
|
|
|
|
|
|
|
#define check(x) \
|
|
|
|
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
|
|
|
|
|
|
|
|
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
|
|
|
|
static struct fixup_entry fixup;
|
|
|
|
|
|
|
|
static long calc_offset(struct fixup_entry *entry, unsigned int *p)
|
|
|
|
{
|
|
|
|
return (unsigned long)p - (unsigned long)entry;
|
|
|
|
}
|
|
|
|
|
2014-08-20 06:55:18 +08:00
|
|
|
static void test_basic_patching(void)
|
2008-06-24 09:33:03 +08:00
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test1[];
|
|
|
|
extern unsigned int end_ftr_fixup_test1[];
|
|
|
|
extern unsigned int ftr_fixup_test1_orig[];
|
|
|
|
extern unsigned int ftr_fixup_test1_expected[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
fixup.value = fixup.mask = 8;
|
2017-07-13 05:36:07 +08:00
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
|
2008-06-24 09:33:03 +08:00
|
|
|
fixup.alt_start_off = fixup.alt_end_off = 0;
|
|
|
|
|
|
|
|
/* Sanity check */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we don't patch if the value matches */
|
|
|
|
patch_feature_section(8, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the value doesn't match */
|
|
|
|
patch_feature_section(0, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the mask doesn't match */
|
2017-07-13 05:36:07 +08:00
|
|
|
memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
|
|
|
|
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
patch_feature_section(~8, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_alternative_patching(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test2[];
|
|
|
|
extern unsigned int end_ftr_fixup_test2[];
|
|
|
|
extern unsigned int ftr_fixup_test2_orig[];
|
|
|
|
extern unsigned int ftr_fixup_test2_alt[];
|
|
|
|
extern unsigned int ftr_fixup_test2_expected[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
fixup.value = fixup.mask = 0xF;
|
2017-07-13 05:36:07 +08:00
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
|
|
|
|
fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
|
|
|
|
fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Sanity check */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we don't patch if the value matches */
|
|
|
|
patch_feature_section(0xF, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the value doesn't match */
|
|
|
|
patch_feature_section(0, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the mask doesn't match */
|
2017-07-13 05:36:07 +08:00
|
|
|
memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
|
|
|
|
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
patch_feature_section(~0xF, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_alternative_case_too_big(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test3[];
|
|
|
|
extern unsigned int end_ftr_fixup_test3[];
|
|
|
|
extern unsigned int ftr_fixup_test3_orig[];
|
|
|
|
extern unsigned int ftr_fixup_test3_alt[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
fixup.value = fixup.mask = 0xC;
|
2017-07-13 05:36:07 +08:00
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
|
|
|
|
fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
|
|
|
|
fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Sanity check */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Expect nothing to be patched, and the error returned to us */
|
|
|
|
check(patch_feature_section(0xF, &fixup) == 1);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
check(patch_feature_section(0, &fixup) == 1);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
check(patch_feature_section(~0xF, &fixup) == 1);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_alternative_case_too_small(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test4[];
|
|
|
|
extern unsigned int end_ftr_fixup_test4[];
|
|
|
|
extern unsigned int ftr_fixup_test4_orig[];
|
|
|
|
extern unsigned int ftr_fixup_test4_alt[];
|
|
|
|
extern unsigned int ftr_fixup_test4_expected[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
|
2008-06-24 09:33:03 +08:00
|
|
|
unsigned long flag;
|
|
|
|
|
|
|
|
/* Check a high-bit flag */
|
|
|
|
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
|
|
|
|
fixup.value = fixup.mask = flag;
|
2017-07-13 05:36:07 +08:00
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
|
|
|
|
fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
|
|
|
|
fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Sanity check */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we don't patch if the value matches */
|
|
|
|
patch_feature_section(flag, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the value doesn't match */
|
|
|
|
patch_feature_section(0, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* Check we do patch if the mask doesn't match */
|
2017-07-13 05:36:07 +08:00
|
|
|
memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
|
|
|
|
check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
patch_feature_section(~flag, &fixup);
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_alternative_case_with_branch(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test5[];
|
|
|
|
extern unsigned int end_ftr_fixup_test5[];
|
|
|
|
extern unsigned int ftr_fixup_test5_expected[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_alternative_case_with_external_branch(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern unsigned int ftr_fixup_test6[];
|
|
|
|
extern unsigned int end_ftr_fixup_test6[];
|
|
|
|
extern unsigned int ftr_fixup_test6_expected[];
|
2018-04-16 22:39:03 +08:00
|
|
|
int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
|
2008-06-24 09:33:03 +08:00
|
|
|
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
2018-04-16 22:39:05 +08:00
|
|
|
static void test_alternative_case_with_branch_to_end(void)
|
|
|
|
{
|
|
|
|
extern unsigned int ftr_fixup_test7[];
|
|
|
|
extern unsigned int end_ftr_fixup_test7[];
|
|
|
|
extern unsigned int ftr_fixup_test7_expected[];
|
|
|
|
int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
|
|
|
|
|
|
|
|
check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
|
|
|
|
}
|
|
|
|
|
2008-06-24 09:33:03 +08:00
|
|
|
static void test_cpu_macros(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern u8 ftr_fixup_test_FTR_macros[];
|
|
|
|
extern u8 ftr_fixup_test_FTR_macros_expected[];
|
|
|
|
unsigned long size = ftr_fixup_test_FTR_macros_expected -
|
|
|
|
ftr_fixup_test_FTR_macros;
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* The fixups have already been done for us during boot */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test_FTR_macros,
|
|
|
|
ftr_fixup_test_FTR_macros_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test_fw_macros(void)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_PPC64
|
2017-07-13 05:36:07 +08:00
|
|
|
extern u8 ftr_fixup_test_FW_FTR_macros[];
|
|
|
|
extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
|
|
|
|
unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
|
|
|
|
ftr_fixup_test_FW_FTR_macros;
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
/* The fixups have already been done for us during boot */
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(ftr_fixup_test_FW_FTR_macros,
|
|
|
|
ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
|
2008-06-24 09:33:03 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2008-07-01 23:16:40 +08:00
|
|
|
static void test_lwsync_macros(void)
|
|
|
|
{
|
2017-07-13 05:36:07 +08:00
|
|
|
extern u8 lwsync_fixup_test[];
|
|
|
|
extern u8 end_lwsync_fixup_test[];
|
|
|
|
extern u8 lwsync_fixup_test_expected_LWSYNC[];
|
|
|
|
extern u8 lwsync_fixup_test_expected_SYNC[];
|
|
|
|
unsigned long size = end_lwsync_fixup_test -
|
|
|
|
lwsync_fixup_test;
|
2008-07-01 23:16:40 +08:00
|
|
|
|
|
|
|
/* The fixups have already been done for us during boot */
|
|
|
|
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(lwsync_fixup_test,
|
|
|
|
lwsync_fixup_test_expected_LWSYNC, size) == 0);
|
2008-07-01 23:16:40 +08:00
|
|
|
} else {
|
2017-07-13 05:36:07 +08:00
|
|
|
check(memcmp(lwsync_fixup_test,
|
|
|
|
lwsync_fixup_test_expected_SYNC, size) == 0);
|
2008-07-01 23:16:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-06 11:40:45 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
static void __init test_prefix_patching(void)
|
|
|
|
{
|
|
|
|
extern unsigned int ftr_fixup_prefix1[];
|
|
|
|
extern unsigned int end_ftr_fixup_prefix1[];
|
|
|
|
extern unsigned int ftr_fixup_prefix1_orig[];
|
|
|
|
extern unsigned int ftr_fixup_prefix1_expected[];
|
|
|
|
int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1);
|
|
|
|
|
|
|
|
fixup.value = fixup.mask = 8;
|
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
|
|
|
|
fixup.alt_start_off = fixup.alt_end_off = 0;
|
|
|
|
|
|
|
|
/* Sanity check */
|
|
|
|
check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
|
|
|
|
|
|
|
|
patch_feature_section(0, &fixup);
|
|
|
|
check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
|
|
|
|
check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init test_prefix_alt_patching(void)
|
|
|
|
{
|
|
|
|
extern unsigned int ftr_fixup_prefix2[];
|
|
|
|
extern unsigned int end_ftr_fixup_prefix2[];
|
|
|
|
extern unsigned int ftr_fixup_prefix2_orig[];
|
|
|
|
extern unsigned int ftr_fixup_prefix2_expected[];
|
|
|
|
extern unsigned int ftr_fixup_prefix2_alt[];
|
|
|
|
int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2);
|
|
|
|
|
|
|
|
fixup.value = fixup.mask = 8;
|
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
|
|
|
|
fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
|
|
|
|
fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
|
|
|
|
/* Sanity check */
|
|
|
|
check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
|
|
|
|
|
|
|
|
patch_feature_section(0, &fixup);
|
|
|
|
check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
|
|
|
|
check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init test_prefix_word_alt_patching(void)
|
|
|
|
{
|
|
|
|
extern unsigned int ftr_fixup_prefix3[];
|
|
|
|
extern unsigned int end_ftr_fixup_prefix3[];
|
|
|
|
extern unsigned int ftr_fixup_prefix3_orig[];
|
|
|
|
extern unsigned int ftr_fixup_prefix3_expected[];
|
|
|
|
extern unsigned int ftr_fixup_prefix3_alt[];
|
|
|
|
int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3);
|
|
|
|
|
|
|
|
fixup.value = fixup.mask = 8;
|
|
|
|
fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
|
|
|
|
fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
|
|
|
|
fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
|
|
|
|
fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
|
|
|
|
/* Sanity check */
|
|
|
|
check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
|
|
|
|
|
|
|
|
patch_feature_section(0, &fixup);
|
|
|
|
check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
|
|
|
|
patch_feature_section(0, &fixup);
|
|
|
|
check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void test_prefix_patching(void) {}
|
|
|
|
static inline void test_prefix_alt_patching(void) {}
|
|
|
|
static inline void test_prefix_word_alt_patching(void) {}
|
|
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
|
2008-06-24 09:33:03 +08:00
|
|
|
static int __init test_feature_fixups(void)
|
|
|
|
{
|
|
|
|
printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
|
|
|
|
|
|
|
|
test_basic_patching();
|
|
|
|
test_alternative_patching();
|
|
|
|
test_alternative_case_too_big();
|
|
|
|
test_alternative_case_too_small();
|
|
|
|
test_alternative_case_with_branch();
|
|
|
|
test_alternative_case_with_external_branch();
|
2018-04-16 22:39:05 +08:00
|
|
|
test_alternative_case_with_branch_to_end();
|
2008-06-24 09:33:03 +08:00
|
|
|
test_cpu_macros();
|
|
|
|
test_fw_macros();
|
2008-07-01 23:16:40 +08:00
|
|
|
test_lwsync_macros();
|
2020-05-06 11:40:45 +08:00
|
|
|
test_prefix_patching();
|
|
|
|
test_prefix_alt_patching();
|
|
|
|
test_prefix_word_alt_patching();
|
2008-06-24 09:33:03 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
late_initcall(test_feature_fixups);
|
|
|
|
|
|
|
|
#endif /* CONFIG_FTR_FIXUP_SELFTEST */
|