From 5d12f0424edf01ccd8abbcba1c7d45fe0b23c779 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:16 +0200 Subject: [PATCH 1/9] x86/dumpstack: Remove code_bytes This was added by 86c418374223 ("[PATCH] i386: add option to show more code in oops reports") long time ago but experience shows that 64 instruction bytes are plenty when deciphering an oops. So get rid of it. Removing it will simplify further enhancements to the opcodes dumping machinery coming in the following patches. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-2-bp@alien8.de --- .../admin-guide/kernel-parameters.txt | 5 ---- arch/x86/kernel/dumpstack.c | 27 +++---------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 11fc28ecdb6d..47aa554e41b7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -587,11 +587,6 @@ Sets the size of memory pool for coherent, atomic dma allocations, by default set to 256K. - code_bytes [X86] How many bytes of object code to print - in an oops report. - Range: 0 - 8192 - Default: 64 - com20020= [HW,NET] ARCnet - COM20020 chipset Format: [,[,[,[,[,]]]]] diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 18fa9d74c182..593db796374d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -22,9 +22,10 @@ #include #include +#define OPCODE_BUFSIZE 64 + int panic_on_unrecovered_nmi; int panic_on_io_nmi; -static unsigned int code_bytes = 64; static int die_counter; bool in_task_stack(unsigned long *stack, struct task_struct *task, @@ -356,26 +357,6 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -static int __init code_bytes_setup(char *s) -{ - ssize_t ret; - unsigned long val; - - if (!s) - return -EINVAL; - - ret = kstrtoul(s, 0, &val); - if (ret) - return ret; - - code_bytes = val; - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); - void show_regs(struct pt_regs *regs) { bool all = true; @@ -393,8 +374,8 @@ void show_regs(struct pt_regs *regs) * time of the fault.. */ if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; + unsigned int code_prologue = OPCODE_BUFSIZE * 43 / 64; + unsigned int code_len = OPCODE_BUFSIZE; unsigned char c; u8 *ip; From 5df61707f0bdf8dce714a14806740e6abf2114c7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:17 +0200 Subject: [PATCH 2/9] x86/dumpstack: Unexport oops_begin() The only user outside of arch/ is not a module since 86cd47334b00 ("ACPI, APEI, GHES, Prevent GHES to be built as module") No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-3-bp@alien8.de --- arch/x86/kernel/dumpstack.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 593db796374d..579455c2b91e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,7 +268,6 @@ unsigned long oops_begin(void) bust_spinlocks(1); return flags; } -EXPORT_SYMBOL_GPL(oops_begin); NOKPROBE_SYMBOL(oops_begin); void __noreturn rewind_stack_do_exit(int signr); From f0a1d7c11c3ebe2f601b448d13e7fbc3a0364a03 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:18 +0200 Subject: [PATCH 3/9] x86/dumpstack: Carve out code-dumping into a function No functionality change, carve it out into a separate function for later changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-4-bp@alien8.de --- arch/x86/kernel/dumpstack.c | 57 +++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 579455c2b91e..eb9d6c00a52f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -70,6 +70,35 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +static void show_opcodes(u8 *rip) +{ + unsigned int code_prologue = OPCODE_BUFSIZE * 43 / 64; + unsigned int code_len = OPCODE_BUFSIZE; + unsigned char c; + u8 *ip; + int i; + + printk(KERN_DEFAULT "Code: "); + + ip = (u8 *)rip - code_prologue; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at IP */ + ip = (u8 *)rip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + pr_cont(" Bad RIP value."); + break; + } + if (ip == (u8 *)rip) + pr_cont("<%02x> ", c); + else + pr_cont("%02x ", c); + } + pr_cont("\n"); +} + void show_iret_regs(struct pt_regs *regs) { printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); @@ -359,7 +388,6 @@ void die(const char *str, struct pt_regs *regs, long err) void show_regs(struct pt_regs *regs) { bool all = true; - int i; show_regs_print_info(KERN_DEFAULT); @@ -373,32 +401,7 @@ void show_regs(struct pt_regs *regs) * time of the fault.. */ if (!user_mode(regs)) { - unsigned int code_prologue = OPCODE_BUFSIZE * 43 / 64; - unsigned int code_len = OPCODE_BUFSIZE; - unsigned char c; - u8 *ip; - show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - - printk(KERN_DEFAULT "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont("<%02x> ", c); - else - pr_cont("%02x ", c); - } + show_opcodes((u8 *)regs->ip); } - pr_cont("\n"); } From 9e4a90fd34445df64a13d136676a31a4dd22aea3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:19 +0200 Subject: [PATCH 4/9] x86/dumpstack: Improve opcodes dumping in the code section The code used to iterate byte-by-byte over the bytes around RIP and that is expensive: disabling pagefaults around it, copy_from_user, etc... Make it read the whole buffer of OPCODE_BUFSIZE size in one go. Use a statically allocated 64 bytes buffer so that concurrent show_opcodes() do not interleave in the output even though in the majority of the cases it's serialized via die_lock. Except the #PF path which doesn't... Also, do the PAGE_OFFSET check outside of the function because latter will be reused in other context. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-5-bp@alien8.de --- arch/x86/kernel/dumpstack.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index eb9d6c00a52f..1d6698b54527 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -72,29 +72,24 @@ static void printk_stack_address(unsigned long address, int reliable, static void show_opcodes(u8 *rip) { - unsigned int code_prologue = OPCODE_BUFSIZE * 43 / 64; - unsigned int code_len = OPCODE_BUFSIZE; - unsigned char c; + unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3; + u8 opcodes[OPCODE_BUFSIZE]; u8 *ip; int i; printk(KERN_DEFAULT "Code: "); ip = (u8 *)rip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)rip; - code_len = code_len - code_prologue + 1; + if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) { + pr_cont("Bad RIP value.\n"); + return; } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - pr_cont(" Bad RIP value."); - break; - } - if (ip == (u8 *)rip) - pr_cont("<%02x> ", c); + + for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) { + if (ip == rip) + pr_cont("<%02x> ", opcodes[i]); else - pr_cont("%02x ", c); + pr_cont("%02x ", opcodes[i]); } pr_cont("\n"); } @@ -402,6 +397,10 @@ void show_regs(struct pt_regs *regs) */ if (!user_mode(regs)) { show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - show_opcodes((u8 *)regs->ip); + + if (regs->ip < PAGE_OFFSET) + printk(KERN_DEFAULT "Code: Bad RIP value.\n"); + else + show_opcodes((u8 *)regs->ip); } } From e8b6f984516b1fcb0ccf4469ca42777c9c2dc76d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:20 +0200 Subject: [PATCH 5/9] x86/dumpstack: Add loglevel argument to show_opcodes() Will be used in the next patch. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-6-bp@alien8.de --- arch/x86/include/asm/stacktrace.h | 1 + arch/x86/kernel/dumpstack.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 133d9425fced..0630eeb18bbc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -111,4 +111,5 @@ static inline unsigned long caller_frame_pointer(void) return (unsigned long)frame; } +void show_opcodes(u8 *rip, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1d6698b54527..1592d0c3ebb5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -70,14 +70,14 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } -static void show_opcodes(u8 *rip) +void show_opcodes(u8 *rip, const char *loglvl) { unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3; u8 opcodes[OPCODE_BUFSIZE]; u8 *ip; int i; - printk(KERN_DEFAULT "Code: "); + printk("%sCode: ", loglvl); ip = (u8 *)rip - code_prologue; if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) { @@ -401,6 +401,6 @@ void show_regs(struct pt_regs *regs) if (regs->ip < PAGE_OFFSET) printk(KERN_DEFAULT "Code: Bad RIP value.\n"); else - show_opcodes((u8 *)regs->ip); + show_opcodes((u8 *)regs->ip, KERN_DEFAULT); } } From ba54d856a9d8a9c56b87e20c88602b7e3cb568fb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:21 +0200 Subject: [PATCH 6/9] x86/fault: Dump user opcode bytes on fatal faults Sometimes it is useful to see which user opcode bytes RIP points to when a fault happens: be it to rule out RIP corruption, to dump info early during boot, when doing core dumps is impossible due to not having a writable filesystem yet. Sometimes it is useful if debugging an issue and one doesn't have access to the executable which caused the fault in order to disassemble it. That last aspect might have some security implications so show_unhandled_signals could be revisited for that or a new config option added. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-7-bp@alien8.de --- arch/x86/mm/fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 73bd8c95ac71..a3fd94eff04d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -828,6 +828,8 @@ static inline void show_signal_msg(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct task_struct *tsk) { + const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; + if (!unhandled_signal(tsk, SIGSEGV)) return; @@ -835,13 +837,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, return; printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, + loglvl, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); printk(KERN_CONT "\n"); + + show_opcodes((u8 *)regs->ip, loglvl); } static void From 7cccf0725cf7402514e09c52b089430005798b7f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:22 +0200 Subject: [PATCH 7/9] x86/dumpstack: Add a show_ip() function ... which shows the Instruction Pointer along with the insn bytes around it. Use it whenever rIP is printed. Drop the rIP < PAGE_OFFSET check since probe_kernel_read() can handle any address properly. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-8-bp@alien8.de --- arch/x86/include/asm/stacktrace.h | 1 + arch/x86/kernel/dumpstack.c | 23 +++++++++++++---------- arch/x86/kernel/process_32.c | 8 +++----- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0630eeb18bbc..b6dc698f992a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -112,4 +112,5 @@ static inline unsigned long caller_frame_pointer(void) } void show_opcodes(u8 *rip, const char *loglvl); +void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1592d0c3ebb5..82da808b5c36 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -94,9 +94,19 @@ void show_opcodes(u8 *rip, const char *loglvl) pr_cont("\n"); } +void show_ip(struct pt_regs *regs, const char *loglvl) +{ +#ifdef CONFIG_X86_32 + printk("%sEIP: %pS\n", loglvl, (void *)regs->ip); +#else + printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip); +#endif + show_opcodes((u8 *)regs->ip, loglvl); +} + void show_iret_regs(struct pt_regs *regs) { - printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, regs->sp, regs->flags); } @@ -392,15 +402,8 @@ void show_regs(struct pt_regs *regs) __show_regs(regs, all); /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. + * When in-kernel, we also print out the stack at the time of the fault.. */ - if (!user_mode(regs)) { + if (!user_mode(regs)) show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - - if (regs->ip < PAGE_OFFSET) - printk(KERN_DEFAULT "Code: Bad RIP value.\n"); - else - show_opcodes((u8 *)regs->ip, KERN_DEFAULT); - } } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5224c6099184..0ae659de21eb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -76,16 +76,14 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); - printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - raw_smp_processor_id()); + show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); + printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); if (!all) return; From 602bd705da334f214fc03db328dc37d2f1f33307 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:23 +0200 Subject: [PATCH 8/9] x86/dumpstack: Save first regs set for the executive summary Save the regs set when __die() is onvoked for the first time and print it in oops_end(). Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-9-bp@alien8.de --- arch/x86/kernel/dumpstack.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 82da808b5c36..ee344030fd0a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -28,6 +28,8 @@ int panic_on_unrecovered_nmi; int panic_on_io_nmi; static int die_counter; +static struct pt_regs exec_summary_regs; + bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info) { @@ -321,6 +323,9 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) raw_local_irq_restore(flags); oops_exit(); + /* Executive summary in case the oops scrolled away */ + __show_regs(&exec_summary_regs, true); + if (!signr) return; if (in_interrupt()) @@ -339,10 +344,10 @@ NOKPROBE_SYMBOL(oops_end); int __die(const char *str, struct pt_regs *regs, long err) { -#ifdef CONFIG_X86_32 - unsigned short ss; - unsigned long sp; -#endif + /* Save the regs of the first oops for the executive summary later. */ + if (!die_counter) + exec_summary_regs = *regs; + printk(KERN_DEFAULT "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", @@ -352,26 +357,13 @@ int __die(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); + show_regs(regs); + print_modules(); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; - print_modules(); - show_regs(regs); -#ifdef CONFIG_X86_32 - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss; - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); - } - printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n", - (void *)regs->ip, ss, sp); -#else - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp); -#endif return 0; } NOKPROBE_SYMBOL(__die); From 4dba072cd097f35fa8f77c49d909ada2b079a4c4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 Apr 2018 18:11:24 +0200 Subject: [PATCH 9/9] x86/dumpstack: Explain the reasoning for the prologue and buffer size The whole reasoning behind the amount of opcode bytes dumped and prologue length isn't very clear so write down some of the reasons for why it is done the way it is. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180417161124.5294-10-bp@alien8.de --- arch/x86/kernel/dumpstack.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ee344030fd0a..666a284116ac 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -72,6 +72,25 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +/* + * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: + * + * In case where we don't have the exact kernel image (which, if we did, we can + * simply disassemble and navigate to the RIP), the purpose of the bigger + * prologue is to have more context and to be able to correlate the code from + * the different toolchains better. + * + * In addition, it helps in recreating the register allocation of the failing + * kernel and thus make sense of the register dump. + * + * What is more, the additional complication of a variable length insn arch like + * x86 warrants having longer byte sequence before rIP so that the disassembler + * can "sync" up properly and find instruction boundaries when decoding the + * opcode bytes. + * + * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random + * guesstimate in attempt to achieve all of the above. + */ void show_opcodes(u8 *rip, const char *loglvl) { unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3;