linux/arch/x86/mm/extable.c
Linus Torvalds da9803dfd3 This feature enhances the current guest memory encryption support
called SEV by also encrypting the guest register state, making the
 registers inaccessible to the hypervisor by en-/decrypting them on world
 switches. Thus, it adds additional protection to Linux guests against
 exfiltration, control flow and rollback attacks.
 
 With SEV-ES, the guest is in full control of what registers the
 hypervisor can access. This is provided by a guest-host exchange
 mechanism based on a new exception vector called VMM Communication
 Exception (#VC), a new instruction called VMGEXIT and a shared
 Guest-Host Communication Block which is a decrypted page shared between
 the guest and the hypervisor.
 
 Intercepts to the hypervisor become #VC exceptions in an SEV-ES guest so
 in order for that exception mechanism to work, the early x86 init code
 needed to be made able to handle exceptions, which, in itself, brings
 a bunch of very nice cleanups and improvements to the early boot code
 like an early page fault handler, allowing for on-demand building of the
 identity mapping. With that, !KASLR configurations do not use the EFI
 page table anymore but switch to a kernel-controlled one.
 
 The main part of this series adds the support for that new exchange
 mechanism. The goal has been to keep this as much as possibly
 separate from the core x86 code by concentrating the machinery in two
 SEV-ES-specific files:
 
  arch/x86/kernel/sev-es-shared.c
  arch/x86/kernel/sev-es.c
 
 Other interaction with core x86 code has been kept at minimum and behind
 static keys to minimize the performance impact on !SEV-ES setups.
 
 Work by Joerg Roedel and Thomas Lendacky and others.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl+FiKYACgkQEsHwGGHe
 VUqS5BAAlh5mKwtxXMyFyAIHa5tpsgDjbecFzy1UVmZyxN0JHLlM3NLmb+K52drY
 PiWjNNMi/cFMFazkuLFHuY0poBWrZml8zRS/mExKgUJC6EtguS9FQnRE9xjDBoWQ
 gOTSGJWEzT5wnFqo8qHwlC2CDCSF1hfL8ks3cUFW2tCWus4F9pyaMSGfFqD224rg
 Lh/8+arDMSIKE4uH0cm7iSuyNpbobId0l5JNDfCEFDYRigQZ6pZsQ9pbmbEpncs4
 rmjDvBA5eHDlNMXq0ukqyrjxWTX4ZLBOBvuLhpyssSXnnu2T+Tcxg09+ZSTyJAe0
 LyC9Wfo0v78JASXMAdeH9b1d1mRYNMqjvnBItNQoqweoqUXWz7kvgxCOp6b/G4xp
 cX5YhB6BprBW2DXL45frMRT/zX77UkEKYc5+0IBegV2xfnhRsjqQAQaWLIksyEaX
 nz9/C6+1Sr2IAv271yykeJtY6gtlRjg/usTlYpev+K0ghvGvTmuilEiTltjHrso1
 XAMbfWHQGSd61LNXofvx/GLNfGBisS6dHVHwtkayinSjXNdWxI6w9fhbWVjQ+y2V
 hOF05lmzaJSG5kPLrsFHFqm2YcxOmsWkYYDBHvtmBkMZSf5B+9xxDv97Uy9NETcr
 eSYk//TEkKQqVazfCQS/9LSm0MllqKbwNO25sl0Tw2k6PnheO2g=
 =toqi
 -----END PGP SIGNATURE-----

Merge tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SEV-ES support from Borislav Petkov:
 "SEV-ES enhances the current guest memory encryption support called SEV
  by also encrypting the guest register state, making the registers
  inaccessible to the hypervisor by en-/decrypting them on world
  switches. Thus, it adds additional protection to Linux guests against
  exfiltration, control flow and rollback attacks.

  With SEV-ES, the guest is in full control of what registers the
  hypervisor can access. This is provided by a guest-host exchange
  mechanism based on a new exception vector called VMM Communication
  Exception (#VC), a new instruction called VMGEXIT and a shared
  Guest-Host Communication Block which is a decrypted page shared
  between the guest and the hypervisor.

  Intercepts to the hypervisor become #VC exceptions in an SEV-ES guest
  so in order for that exception mechanism to work, the early x86 init
  code needed to be made able to handle exceptions, which, in itself,
  brings a bunch of very nice cleanups and improvements to the early
  boot code like an early page fault handler, allowing for on-demand
  building of the identity mapping. With that, !KASLR configurations do
  not use the EFI page table anymore but switch to a kernel-controlled
  one.

  The main part of this series adds the support for that new exchange
  mechanism. The goal has been to keep this as much as possibly separate
  from the core x86 code by concentrating the machinery in two
  SEV-ES-specific files:

    arch/x86/kernel/sev-es-shared.c
    arch/x86/kernel/sev-es.c

  Other interaction with core x86 code has been kept at minimum and
  behind static keys to minimize the performance impact on !SEV-ES
  setups.

  Work by Joerg Roedel and Thomas Lendacky and others"

* tag 'x86_seves_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (73 commits)
  x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer
  x86/sev-es: Check required CPU features for SEV-ES
  x86/efi: Add GHCB mappings when SEV-ES is active
  x86/sev-es: Handle NMI State
  x86/sev-es: Support CPU offline/online
  x86/head/64: Don't call verify_cpu() on starting APs
  x86/smpboot: Load TSS and getcpu GDT entry before loading IDT
  x86/realmode: Setup AP jump table
  x86/realmode: Add SEV-ES specific trampoline entry point
  x86/vmware: Add VMware-specific handling for VMMCALL under SEV-ES
  x86/kvm: Add KVM-specific VMMCALL handling under SEV-ES
  x86/paravirt: Allow hypervisor-specific VMMCALL handling under SEV-ES
  x86/sev-es: Handle #DB Events
  x86/sev-es: Handle #AC Events
  x86/sev-es: Handle VMMCALL Events
  x86/sev-es: Handle MWAIT/MWAITX Events
  x86/sev-es: Handle MONITOR/MONITORX Events
  x86/sev-es: Handle INVD Events
  x86/sev-es: Handle RDPMC Events
  x86/sev-es: Handle RDTSC(P) Events
  ...
2020-10-14 10:21:34 -07:00

249 lines
7.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <xen/xen.h>
#include <asm/fpu/internal.h>
#include <asm/sev-es.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *, int, unsigned long,
unsigned long);
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
static inline ex_handler_t
ex_fixup_handler(const struct exception_table_entry *x)
{
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}
__visible bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_default);
__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fault);
/*
* Handler for when we fail to restore a task's FPU state. We should never get
* here because the FPU state of a task using the FPU (task->thread.fpu.state)
* should always be valid. However, past bugs have allowed userspace to set
* reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
* These caused XRSTOR to fail when switching to the task, leaking the FPU
* registers of the task previously executing on the CPU. Mitigate this class
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
regs->ip = ex_fixup_addr(fixup);
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
__copy_kernel_to_fpregs(&init_fpstate, -1);
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_uaccess);
__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
}
EXPORT_SYMBOL(ex_handler_copy);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
/* Pretend that the read succeeded and returned 0. */
regs->ip = ex_fixup_addr(fixup);
regs->ax = 0;
regs->dx = 0;
return true;
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
(unsigned int)regs->ax, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
/* Pretend that the write succeeded. */
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
asm volatile ("mov %0, %%fs" : : "rm" (0));
return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
}
EXPORT_SYMBOL(ex_handler_clear_fs);
enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
unsigned long fault_addr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
extern u32 pnp_bios_is_utter_crap;
pnp_bios_is_utter_crap = 1;
printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
__asm__ volatile(
"movl %0, %%esp\n\t"
"jmp *%1\n\t"
: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
panic("do_trap: can't hit this");
}
#endif
e = search_exception_tables(regs->ip);
if (!e)
return 0;
handler = ex_fixup_handler(e);
return handler(e, regs, trapnr, error_code, fault_addr);
}
extern unsigned int early_recursion_flag;
/* Restricted version used during very early boot */
void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
/* Ignore early NMIs. */
if (trapnr == X86_TRAP_NMI)
return;
if (early_recursion_flag > 2)
goto halt_loop;
/*
* Old CPUs leave the high bits of CS on the stack
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
* Xen pv domains are not using the default __KERNEL_CS.
*/
if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
goto fail;
/*
* The full exception fixup machinery is available as soon as
* the early IDT is loaded. This means that it is the
* responsibility of extable users to either function correctly
* when handlers are invoked early or to simply avoid causing
* exceptions before they're ready to handle them.
*
* This is better than filtering which handlers can be used,
* because refusing to call a handler here is guaranteed to
* result in a hard-to-debug panic.
*
* Keep in mind that not all vectors actually get here. Early
* page faults, for example, are special.
*/
if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
return;
if (trapnr == X86_TRAP_UD) {
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
/* Skip the ud2. */
regs->ip += LEN_UD2;
return;
}
/*
* If this was a BUG and report_bug returns or if this
* was just a normal #UD, we want to continue onward and
* crash.
*/
}
fail:
early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
(unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
regs->orig_ax, read_cr2());
show_regs(regs);
halt_loop:
while (true)
halt();
}