mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-04 19:54:03 +08:00
x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit
The old x86_32 doublefault_fn() was old and crufty, and it did not even try to recover. do_double_fault() is much nicer. Rewrite the 32-bit double fault code to sanitize CPU state and call do_double_fault(). This is mostly an exercise i386 archaeology. With this patch applied, 32-bit double faults get a real stack trace, just like 64-bit double faults. [ mingo: merged the patch to a later kernel base. ] Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
dc4e0021b0
commit
7d8d8cfdee
@ -1537,6 +1537,48 @@ SYM_CODE_START(debug)
|
||||
jmp common_exception
|
||||
SYM_CODE_END(debug)
|
||||
|
||||
#ifdef CONFIG_DOUBLEFAULT
|
||||
SYM_CODE_START(double_fault)
|
||||
1:
|
||||
/*
|
||||
* This is a task gate handler, not an interrupt gate handler.
|
||||
* The error code is on the stack, but the stack is otherwise
|
||||
* empty. Interrupts are off. Our state is sane with the following
|
||||
* exceptions:
|
||||
*
|
||||
* - CR0.TS is set. "TS" literally means "task switched".
|
||||
* - EFLAGS.NT is set because we're a "nested task".
|
||||
* - The doublefault TSS has back_link set and has been marked busy.
|
||||
* - TR points to the doublefault TSS and the normal TSS is busy.
|
||||
* - CR3 is the normal kernel PGD. This would be delightful, except
|
||||
* that the CPU didn't bother to save the old CR3 anywhere. This
|
||||
* would make it very awkward to return back to the context we came
|
||||
* from.
|
||||
*
|
||||
* The rest of EFLAGS is sanitized for us, so we don't need to
|
||||
* worry about AC or DF.
|
||||
*
|
||||
* Don't even bother popping the error code. It's always zero,
|
||||
* and ignoring it makes us a bit more robust against buggy
|
||||
* hypervisor task gate implementations.
|
||||
*
|
||||
* We will manually undo the task switch instead of doing a
|
||||
* task-switching IRET.
|
||||
*/
|
||||
|
||||
clts /* clear CR0.TS */
|
||||
pushl $X86_EFLAGS_FIXED
|
||||
popfl /* clear EFLAGS.NT */
|
||||
|
||||
call doublefault_shim
|
||||
|
||||
/* We don't support returning, so we have no IRET here. */
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
SYM_CODE_END(double_fault)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NMI is doubly nasty. It can happen on the first instruction of
|
||||
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
|
||||
|
@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
|
||||
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
|
||||
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
|
||||
#endif
|
||||
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
|
||||
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
|
||||
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
|
||||
|
@ -9,42 +9,83 @@
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
extern void double_fault(void);
|
||||
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
|
||||
|
||||
static void doublefault_fn(void)
|
||||
#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
|
||||
|
||||
static void set_df_gdt_entry(unsigned int cpu);
|
||||
|
||||
/*
|
||||
* Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
|
||||
* we're running the doublefault task. Cannot return.
|
||||
*/
|
||||
asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||
{
|
||||
struct desc_ptr gdt_desc = {0, 0};
|
||||
unsigned long gdt, tss;
|
||||
unsigned long cr2;
|
||||
struct pt_regs regs;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
|
||||
|
||||
native_store_gdt(&gdt_desc);
|
||||
gdt = gdt_desc.address;
|
||||
cr2 = native_read_cr2();
|
||||
|
||||
printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
|
||||
/* Reset back to the normal kernel task. */
|
||||
force_reload_TR();
|
||||
set_df_gdt_entry(smp_processor_id());
|
||||
|
||||
if (ptr_ok(gdt)) {
|
||||
gdt += GDT_ENTRY_TSS << 3;
|
||||
tss = get_desc_base((struct desc_struct *)gdt);
|
||||
printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
|
||||
trace_hardirqs_off();
|
||||
|
||||
if (ptr_ok(tss)) {
|
||||
struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
|
||||
/*
|
||||
* Fill in pt_regs. A downside of doing this in C is that the unwinder
|
||||
* won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
|
||||
* won't successfully unwind to the source of the double fault.
|
||||
* The main dump from do_double_fault() is fine, though, since it
|
||||
* uses these regs directly.
|
||||
*
|
||||
* If anyone ever cares, this could be moved to asm.
|
||||
*/
|
||||
regs.ss = TSS(ss);
|
||||
regs.__ssh = 0;
|
||||
regs.sp = TSS(sp);
|
||||
regs.flags = TSS(flags);
|
||||
regs.cs = TSS(cs);
|
||||
/* We won't go through the entry asm, so we can leave __csh as 0. */
|
||||
regs.__csh = 0;
|
||||
regs.ip = TSS(ip);
|
||||
regs.orig_ax = 0;
|
||||
regs.gs = TSS(gs);
|
||||
regs.__gsh = 0;
|
||||
regs.fs = TSS(fs);
|
||||
regs.__fsh = 0;
|
||||
regs.es = TSS(es);
|
||||
regs.__esh = 0;
|
||||
regs.ds = TSS(ds);
|
||||
regs.__dsh = 0;
|
||||
regs.ax = TSS(ax);
|
||||
regs.bp = TSS(bp);
|
||||
regs.di = TSS(di);
|
||||
regs.si = TSS(si);
|
||||
regs.dx = TSS(dx);
|
||||
regs.cx = TSS(cx);
|
||||
regs.bx = TSS(bx);
|
||||
|
||||
printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
|
||||
t->ip, t->sp);
|
||||
do_double_fault(®s, 0, cr2);
|
||||
|
||||
printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
|
||||
t->ax, t->bx, t->cx, t->dx);
|
||||
printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
|
||||
t->si, t->di);
|
||||
}
|
||||
}
|
||||
|
||||
for (;;)
|
||||
cpu_relax();
|
||||
/*
|
||||
* x86_32 does not save the original CR3 anywhere on a task switch.
|
||||
* This means that, even if we wanted to return, we would need to find
|
||||
* some way to reconstruct CR3. We could make a credible guess based
|
||||
* on cpu_tlbstate, but that would be racy and would not account for
|
||||
* PTI.
|
||||
*
|
||||
* Instead, don't bother. We can return through
|
||||
* rewind_stack_do_exit() instead.
|
||||
*/
|
||||
panic("cannot return from double fault\n");
|
||||
}
|
||||
NOKPROBE_SYMBOL(doublefault_shim);
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||
.tss = {
|
||||
@ -55,9 +96,8 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
|
||||
|
||||
.ip = (unsigned long) doublefault_fn,
|
||||
/* 0x2 bit is always set */
|
||||
.flags = X86_EFLAGS_SF | 0x2,
|
||||
.ip = (unsigned long) double_fault,
|
||||
.flags = X86_EFLAGS_FIXED,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
.ss = __KERNEL_DS,
|
||||
@ -71,6 +111,14 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||
},
|
||||
};
|
||||
|
||||
static void set_df_gdt_entry(unsigned int cpu)
|
||||
{
|
||||
/* Set up doublefault TSS pointer in the GDT */
|
||||
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
|
||||
&get_cpu_entry_area(cpu)->doublefault_stack.tss);
|
||||
|
||||
}
|
||||
|
||||
void doublefault_init_cpu_tss(void)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
@ -84,8 +132,5 @@ void doublefault_init_cpu_tss(void)
|
||||
(unsigned long)&cea->doublefault_stack.stack +
|
||||
sizeof(doublefault_stack.stack));
|
||||
|
||||
/* Set up doublefault TSS pointer in the GDT */
|
||||
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
|
||||
&get_cpu_entry_area(cpu)->doublefault_stack.tss);
|
||||
|
||||
set_df_gdt_entry(cpu);
|
||||
}
|
||||
|
@ -306,8 +306,23 @@ __visible void __noreturn handle_stack_overflow(const char *message,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Runs on IST stack */
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
|
||||
/*
|
||||
* Runs on an IST stack for x86_64 and on a special task stack for x86_32.
|
||||
*
|
||||
* On x86_64, this is more or less a normal kernel entry. Notwithstanding the
|
||||
* SDM's warnings about double faults being unrecoverable, returning works as
|
||||
* expected. Presumably what the SDM actually means is that the CPU may get
|
||||
* the register state wrong on entry, so returning could be a bad idea.
|
||||
*
|
||||
* Various CPU engineers have promised that double faults due to an IRET fault
|
||||
* while the stack is read-only are, in fact, recoverable.
|
||||
*
|
||||
* On x86_32, this is entered through a task gate, and regs are synthesized
|
||||
* from the TSS. Returning is, in principle, okay, but changes to regs will
|
||||
* be lost. If, for some reason, we need to return to a context with modified
|
||||
* regs, the shim code could be adjusted to synchronize the registers.
|
||||
*/
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
|
||||
{
|
||||
static const char str[] = "double fault";
|
||||
|
Loading…
Reference in New Issue
Block a user