From 250bbd12c2fe1221ec96d8087d63e982d4f2180a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 24 Apr 2014 19:08:24 +0200 Subject: [PATCH 01/21] uprobes/x86: Refuse to attach uprobe to "word-sized" branch insns All branch insns on x86 can be prefixed with the operand-size override prefix, 0x66. It was only ever useful for performing jumps to 32-bit offsets in 16-bit code segments. In 32-bit code, such instructions are useless since they cause IP truncation to 16 bits, and in case of call insns, they save only 16 bits of return address and misalign the stack pointer as a "bonus". In 64-bit code, such instructions are treated differently by Intel and AMD CPUs: Intel ignores the prefix altogether, AMD treats them the same as in 32-bit mode. Before this patch, the emulation code would execute the instructions as if they have no 0x66 prefix. With this patch, we refuse to attach uprobes to such insns. Signed-off-by: Denys Vlasenko Acked-by: Jim Keniston Acked-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace22916ade3..3cf24a218196 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -583,6 +583,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + int i; /* has the side-effect of processing the entire instruction */ insn_get_length(insn); @@ -612,6 +613,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return -ENOSYS; } + /* + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. + * No one uses these insns, reject any branch insns with such prefix. + */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x66) + return -ENOTSUPP; + } + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; From 73175d0d19657ec132cc24e8cf0e341e73c54868 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 12:34:02 +0200 Subject: [PATCH 02/21] uprobes/x86: Add uprobe_init_insn(), kill validate_insn_{32,64}bits() validate_insn_32bits() and validate_insn_64bits() are very similar, turn them into the single uprobe_init_insn() which has the additional "bool x86_64" argument which can be passed to insn_init() and used to choose between good_insns_64/good_insns_32. Also kill UPROBE_FIX_NONE, it has no users. Note: the current code doesn't use ifdef's consistently, good_insns_64 depends on CONFIG_X86_64 but good_insns_32 is unconditional. This patch removes ifdef around good_insns_64, we will add it back later along with the similar one for good_insns_32. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 45 +++++++++++---------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3cf24a218196..b4aff6a70f4d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -32,9 +32,6 @@ /* Post-execution fixups. */ -/* No fixup needed */ -#define UPROBE_FIX_NONE 0x0 - /* Adjust IP back to vicinity of actual insn */ #define UPROBE_FIX_IP 0x1 @@ -114,7 +111,6 @@ static volatile u32 good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -#ifdef CONFIG_X86_64 /* Good-instruction tables for 64-bit apps */ static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ @@ -138,7 +134,6 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -#endif #undef W /* @@ -209,16 +204,22 @@ static bool is_prefix_bad(struct insn *insn) return false; } -static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) { - insn_init(insn, auprobe->insn, false); + u32 volatile *good_insns; + + insn_init(insn, auprobe->insn, x86_64); - /* Skip good instruction prefixes; reject "bad" ones. */ insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + if (x86_64) + good_insns = good_insns_64; + else + good_insns = good_insns_32; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) return 0; if (insn->opcode.nbytes == 2) { @@ -355,30 +356,10 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long * } } -static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) -{ - insn_init(insn, auprobe->insn, true); - - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); - if (is_prefix_bad(insn)) - return -ENOTSUPP; - - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) - return 0; - - if (insn->opcode.nbytes == 2) { - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) - return 0; - } - return -ENOTSUPP; -} - static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) { - if (mm->context.ia32_compat) - return validate_insn_32bits(auprobe, insn); - return validate_insn_64bits(auprobe, insn); + bool x86_64 = !mm->context.ia32_compat; + return uprobe_init_insn(auprobe, insn, x86_64); } #else /* 32-bit: */ /* @@ -398,7 +379,7 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs * static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) { - return validate_insn_32bits(auprobe, insn); + return uprobe_init_insn(auprobe, insn, false); } #endif /* CONFIG_X86_64 */ From 2ae1f49ae1978fedb6ad607e1f8b084aa9752f95 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 14:03:05 +0200 Subject: [PATCH 03/21] uprobes/x86: Add is_64bit_mm(), kill validate_insn_bits() 1. Extract the ->ia32_compat check from 64bit validate_insn_bits() into the new helper, is_64bit_mm(), it will have more users. TODO: this checks is actually wrong if mm owner is X32 task, we need another fix which changes set_personality_ia32(). TODO: even worse, the whole 64-or-32-bit logic is very broken and the fix is not simple, we need the nontrivial changes in the core uprobes code. 2. Kill validate_insn_bits() and change its single caller to use uprobe_init_insn(is_64bit_mm(mm). Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b4aff6a70f4d..b3b25ddc04fb 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -231,6 +231,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool } #ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !mm->context.ia32_compat; +} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses @@ -355,13 +360,11 @@ handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long * *correction += 4; } } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - bool x86_64 = !mm->context.ia32_compat; - return uprobe_init_insn(auprobe, insn, x86_64); -} #else /* 32-bit: */ +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return false; +} /* * No RIP-relative addressing on 32-bit */ @@ -376,11 +379,6 @@ static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs * long *correction) { } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - return uprobe_init_insn(auprobe, insn, false); -} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { @@ -625,7 +623,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, bool fix_ip = true, fix_call = false; int ret; - ret = validate_insn_bits(auprobe, mm, &insn); + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret; From ff261964cfcfe49d73690ca29b0ba2853d9497e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 14:15:27 +0200 Subject: [PATCH 04/21] uprobes/x86: Shift "insn_complete" from branch_setup_xol_ops() to uprobe_init_insn() Change uprobe_init_insn() to make insn_complete() == T, this makes other insn_get_*() calls unnecessary. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b3b25ddc04fb..98d7db50f425 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -209,8 +209,11 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool u32 volatile *good_insns; insn_init(insn, auprobe->insn, x86_64); + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; @@ -283,8 +286,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); - insn_get_length(insn); - /* * Convert from rip-relative addressing to indirect addressing * via a scratch register. Change the r/m field from 0x5 (%rip) @@ -564,11 +565,6 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) u8 opc1 = OPCODE1(insn); int i; - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; - switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ @@ -654,7 +650,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, fix_ip = false; break; case 0xff: - insn_get_modrm(&insn); switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ fix_call = true; From 8dbacad93a2a12adebcc717e6055b1bcc1739ab8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 16:07:15 +0200 Subject: [PATCH 05/21] uprobes/x86: Make good_insns_* depend on CONFIG_X86_* Add the suitable ifdef's around good_insns_* arrays. We do not want to add the ugly ifdef's into their only user, uprobe_init_insn(), so the "#else" branch simply defines them as NULL. This doesn't generate the extra code, gcc is smart enough, although the code is fine even if it could not detect that (without CONFIG_IA32_EMULATION) is_64bit_mm() is __builtin_constant_p(). The patch looks more complicated because it also moves good_insns_64 up close to good_insns_32. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 56 ++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 98d7db50f425..892975b3c99c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -64,6 +64,7 @@ * to keep gcc from statically optimizing it out, as variable_test_bit makes * some versions of gcc to think only *(unsigned long*) is used. */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static volatile u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -86,6 +87,37 @@ static volatile u32 good_insns_32[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_32 NULL +#endif + +/* Good-instruction tables for 64-bit apps */ +#if defined(CONFIG_X86_64) +static volatile u32 good_insns_64[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ + W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ + W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ + W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ + W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ + W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ + W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; +#else +#define good_insns_64 NULL +#endif /* Using this for both 64-bit and 32-bit apps */ static volatile u32 good_2byte_insns[256 / 32] = { @@ -110,30 +142,6 @@ static volatile u32 good_2byte_insns[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; - -/* Good-instruction tables for 64-bit apps */ -static volatile u32 good_insns_64[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ - W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ - W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ - /* ---------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W /* From b24dc8dace74708fd849312722090169c5da97d3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 19 Apr 2014 18:10:09 +0200 Subject: [PATCH 06/21] uprobes/x86: Fix is_64bit_mm() with CONFIG_X86_X32 is_64bit_mm() assumes that mm->context.ia32_compat means the 32-bit instruction set, this is not true if the task is TIF_X32. Change set_personality_ia32() to initialize mm->context.ia32_compat by TIF_X32 or TIF_IA32 instead of 1. This allows to fix is_64bit_mm() without affecting other users, they all treat ia32_compat as "bool". TIF_ in ->ia32_compat looks a bit strange, but this is grep-friendly and avoids the new define's. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/process_64.c | 7 ++++--- arch/x86/kernel/uprobes.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9c0280f93d05..9b53940981b7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - if (x32) { clear_thread_flag(TIF_IA32); set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* is_compat_task() uses the presence of the x32 syscall bit flag to determine compat status */ @@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 892975b3c99c..ecbffd16d090 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -245,7 +245,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool static inline bool is_64bit_mm(struct mm_struct *mm) { return !config_enabled(CONFIG_IA32_EMULATION) || - !mm->context.ia32_compat; + !(mm->context.ia32_compat == TIF_IA32); } /* * If arch_uprobe->insn doesn't use rip-relative addressing, return From dd91016dfc9ba9236cb0149984da3f0434278b49 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Apr 2014 15:20:07 +0200 Subject: [PATCH 07/21] uprobes/x86: Don't change the task's state if ->pre_xol() fails Currently this doesn't matter, the only ->pre_xol() hook can't fail, but we need to fix arch_uprobe_pre_xol() anyway. If ->pre_xol() fails we should not change regs->ip/flags, we should just return the error to make restart actually possible. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ecbffd16d090..f4464b1b9435 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -687,6 +687,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + if (auprobe->ops->pre_xol) { + int err = auprobe->ops->pre_xol(auprobe, regs); + if (err) + return err; + } + regs->ip = utask->xol_vaddr; utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; @@ -696,8 +702,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - if (auprobe->ops->pre_xol) - return auprobe->ops->pre_xol(auprobe, regs); return 0; } From 588fbd613c3d8fa73e96720761d49f1d40d34d4c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 16:58:17 +0200 Subject: [PATCH 08/21] uprobes/x86: Introduce uprobe_xol_ops->abort() and default_abort_op() arch_uprobe_abort_xol() calls handle_riprel_post_xol() even if auprobe->ops != default_xol_ops. This is fine correctness wise, only default_pre_xol_op() can set UPROBE_FIX_RIP_AX|UPROBE_FIX_RIP_CX and otherwise handle_riprel_post_xol() is nop. But this doesn't look clean and this doesn't allow us to move ->fixups into the union in arch_uprobe. Move this handle_riprel_post_xol() call into the new default_abort_op() hook and change arch_uprobe_abort_xol() accordingly. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index f4464b1b9435..b3c2a92cce6c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -394,6 +394,7 @@ struct uprobe_xol_ops { bool (*emulate)(struct arch_uprobe *, struct pt_regs *); int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); int (*post_xol)(struct arch_uprobe *, struct pt_regs *); + void (*abort)(struct arch_uprobe *, struct pt_regs *); }; static inline int sizeof_long(void) @@ -444,9 +445,15 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs return 0; } +static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + handle_riprel_post_xol(auprobe, regs, NULL); +} + static struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, + .abort = default_abort_op, }; static bool branch_is_call(struct arch_uprobe *auprobe) @@ -820,10 +827,11 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - handle_riprel_post_xol(auprobe, regs, NULL); - instruction_pointer_set(regs, utask->vaddr); + if (auprobe->ops->abort) + auprobe->ops->abort(auprobe, regs); + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; From 6ded5f3848bfd3227ee208aa38f8bf8d7209d4e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 18:28:02 +0200 Subject: [PATCH 09/21] uprobes/x86: Don't use arch_uprobe_abort_xol() in arch_uprobe_post_xol() 014940bad8e4 "uprobes/x86: Send SIGILL if arch_uprobe_post_xol() fails" changed arch_uprobe_post_xol() to use arch_uprobe_abort_xol() if ->post_xol fails. This was correct and helped to avoid the additional complications, we need to clear X86_EFLAGS_TF in this case. However, now that we have uprobe_xol_ops->abort() hook it would be better to avoid arch_uprobe_abort_xol() here. ->post_xol() should likely do what ->abort() does anyway, we should not do the same work twice. Currently only handle_riprel_post_xol() can be called twice, this is unnecessary but safe. Still this is not clean and can lead to the problems in future. Change arch_uprobe_post_xol() to clear X86_EFLAGS_TF and restore ->ip by hand and avoid arch_uprobe_abort_xol(). This temporary uglifies the usage of autask.saved_tf, we will cleanup this later. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b3c2a92cce6c..2efb93f96030 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -759,22 +759,24 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { int err = auprobe->ops->post_xol(auprobe, regs); if (err) { - arch_uprobe_abort_xol(auprobe, regs); + if (!utask->autask.saved_tf) + regs->flags &= ~X86_EFLAGS_TF; /* - * Restart the probed insn. ->post_xol() must ensure - * this is really possible if it returns -ERESTART. + * Restore ->ip for restart or post mortem analysis. + * ->post_xol() must not return -ERESTART unless this + * is really possible. */ + regs->ip = utask->vaddr; if (err == -ERESTART) return 0; return err; } } - - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need @@ -819,9 +821,8 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. - * Reset the instruction pointer to its probed address for the potential - * restart or for post mortem analysis. + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { From 220ef8dc9a7a63fe202aacd3fc61e5104f6dd98c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 20:39:56 +0200 Subject: [PATCH 10/21] uprobes/x86: Move UPROBE_FIX_SETF logic from arch_uprobe_post_xol() to default_post_xol_op() UPROBE_FIX_SETF is only needed to handle "popf" correctly but it is processed by the generic arch_uprobe_post_xol() code. This doesn't allows us to make ->fixups private for default_xol_ops. 1 Change default_post_xol_op(UPROBE_FIX_SETF) to set ->saved_tf = T. "popf" always reads the flags from stack, it doesn't matter if TF was set or not before single-step. Ignoring the naming, this is even more logical, "saved_tf" means "owned by application" and we do not own this flag after "popf". 2. Change arch_uprobe_post_xol() to save ->saved_tf into the local "bool send_sigtrap" before ->post_xol(). 3. Change arch_uprobe_post_xol() to ignore UPROBE_FIX_SETF and just check ->saved_tf after ->post_xol(). With this patch ->fixups and ->rip_rela_target_address are only used by default_xol_ops hooks, we are ready to remove them from the common part of arch_uprobe. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2efb93f96030..b2bca293fc57 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -441,6 +441,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs return -ERESTART; } } + /* popf; tell the caller to not touch TF */ + if (auprobe->fixups & UPROBE_FIX_SETF) + utask->autask.saved_tf = true; return 0; } @@ -757,15 +760,15 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + bool send_sigtrap = utask->autask.saved_tf; + int err = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { - int err = auprobe->ops->post_xol(auprobe, regs); + err = auprobe->ops->post_xol(auprobe, regs); if (err) { - if (!utask->autask.saved_tf) - regs->flags &= ~X86_EFLAGS_TF; /* * Restore ->ip for restart or post mortem analysis. * ->post_xol() must not return -ERESTART unless this @@ -773,8 +776,8 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) */ regs->ip = utask->vaddr; if (err == -ERESTART) - return 0; - return err; + err = 0; + send_sigtrap = false; } } /* @@ -782,12 +785,13 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right. */ - if (utask->autask.saved_tf) + if (send_sigtrap) send_sig(SIGTRAP, current, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + + if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; - return 0; + return err; } /* callback routine for handling exceptions. */ From 97aa5cddbe9e01521137f337624469374e3cbde5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Apr 2014 16:20:55 +0200 Subject: [PATCH 11/21] uprobes/x86: Move default_xol_ops's data into arch_uprobe->def Finally we can move arch_uprobe->fixups/rip_rela_target_address into the new "def" struct and place this struct in the union, they are only used by default_xol_ops paths. The patch also renames rip_rela_target_address to riprel_target just to make this name shorter. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/include/asm/uprobes.h | 12 ++++++---- arch/x86/kernel/uprobes.c | 43 +++++++++++++++++----------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b93854..72caff7afbde 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -41,18 +41,20 @@ struct arch_uprobe { u8 ixol[MAX_UINSN_BYTES]; }; - u16 fixups; const struct uprobe_xol_ops *ops; union { -#ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; -#endif struct { s32 offs; u8 ilen; u8 opc1; - } branch; + } branch; + struct { +#ifdef CONFIG_X86_64 + long riprel_target; +#endif + u16 fixups; + } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index b2bca293fc57..7824ce248f8f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -251,10 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address - * accordingly. (The contents of the scratch register will be saved - * before we single-step the modified instruction, and restored - * afterward.) + * def->fixups and def->riprel_target accordingly. (The contents of the + * scratch register will be saved before we single-step the modified + * instruction, and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -308,18 +307,18 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_CX; + auprobe->def.fixups = UPROBE_FIX_RIP_CX; /* Change modrm from 00 000 101 to 00 000 001. */ *cursor = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_AX; + auprobe->def.fixups = UPROBE_FIX_RIP_AX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *cursor = (reg << 3); } /* Target address = address of next instruction + (signed) offset */ - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + auprobe->def.riprel_target = (long)insn->length + insn->displacement.value; /* Displacement field is gone; slide immediate field (if any) over. */ if (insn->immediate.nbytes) { @@ -336,25 +335,25 @@ static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { autask->saved_scratch_register = regs->ax; regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { + regs->ax += auprobe->def.riprel_target; + } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { autask->saved_scratch_register = regs->cx; regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; + regs->cx += auprobe->def.riprel_target; } } static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { struct arch_uprobe_task *autask; autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) regs->ax = autask->saved_scratch_register; else regs->cx = autask->saved_scratch_register; @@ -432,17 +431,17 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = (long)(utask->vaddr - utask->xol_vaddr); handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) + if (auprobe->def.fixups & UPROBE_FIX_IP) regs->ip += correction; - if (auprobe->fixups & UPROBE_FIX_CALL) { + if (auprobe->def.fixups & UPROBE_FIX_CALL) { if (adjust_ret_addr(regs->sp, correction)) { regs->sp += sizeof_long(); return -ERESTART; } } /* popf; tell the caller to not touch TF */ - if (auprobe->fixups & UPROBE_FIX_SETF) + if (auprobe->def.fixups & UPROBE_FIX_SETF) utask->autask.saved_tf = true; return 0; @@ -646,13 +645,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; /* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups - * is either zero or it reflects rip-related fixups. + * Figure out which fixups default_post_xol_op() will need to perform, + * and annotate def->fixups accordingly. To start with, ->fixups is + * either zero or it reflects rip-related fixups. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; + auprobe->def.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: @@ -680,9 +679,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; + auprobe->def.fixups |= UPROBE_FIX_IP; if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->def.fixups |= UPROBE_FIX_CALL; auprobe->ops = &default_xol_ops; return 0; From 78d9af4cd375880a574327210eb9dab572618364 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 18:52:37 +0200 Subject: [PATCH 12/21] uprobes/x86: Cleanup the usage of arch_uprobe->def.fixups, make it u8 handle_riprel_insn() assumes that nobody else could modify ->fixups before. This is correct but fragile, change it to use "|=". Also make ->fixups u8, we are going to add the new members into the union. It is not clear why UPROBE_FIX_RIP_.X lived in the upper byte, redefine them so that they can fit into u8. Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 2 +- arch/x86/kernel/uprobes.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 72caff7afbde..9ce25ce04fee 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -53,7 +53,7 @@ struct arch_uprobe { #ifdef CONFIG_X86_64 long riprel_target; #endif - u16 fixups; + u8 fixups; } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7824ce248f8f..a8e1d7e47001 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -33,16 +33,16 @@ /* Post-execution fixups. */ /* Adjust IP back to vicinity of actual insn */ -#define UPROBE_FIX_IP 0x1 +#define UPROBE_FIX_IP 0x01 /* Adjust the return address of a call insn */ -#define UPROBE_FIX_CALL 0x2 +#define UPROBE_FIX_CALL 0x02 /* Instruction will modify TF, don't change it */ -#define UPROBE_FIX_SETF 0x4 +#define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x8000 -#define UPROBE_FIX_RIP_CX 0x4000 +#define UPROBE_FIX_RIP_AX 0x08 +#define UPROBE_FIX_RIP_CX 0x10 #define UPROBE_TRAP_NR UINT_MAX @@ -307,12 +307,12 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * is NOT the register operand, so we use %rcx (register * #1) for the scratch register. */ - auprobe->def.fixups = UPROBE_FIX_RIP_CX; + auprobe->def.fixups |= UPROBE_FIX_RIP_CX; /* Change modrm from 00 000 101 to 00 000 001. */ *cursor = 0x1; } else { /* Use %rax (register #0) for the scratch register. */ - auprobe->def.fixups = UPROBE_FIX_RIP_AX; + auprobe->def.fixups |= UPROBE_FIX_RIP_AX; /* Change modrm from 00 xxx 101 to 00 xxx 000 */ *cursor = (reg << 3); } From 2b82cadffc4154a25c25d88a63c7fb3397cda9d6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 19:21:38 +0200 Subject: [PATCH 13/21] uprobes/x86: Introduce push_ret_address() Extract the "push return address" code from branch_emulate_op() into the new simple helper, push_ret_address(). It will have more users. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index a8e1d7e47001..df75913acfc0 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -407,6 +407,17 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return 0; } +static int push_ret_address(struct pt_regs *regs, unsigned long ip) +{ + unsigned long new_sp = regs->sp - sizeof_long(); + + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) + return -EFAULT; + + regs->sp = new_sp; + return 0; +} + /* * Adjust the return address pushed by a call insn executed out of line. */ @@ -517,7 +528,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { - unsigned long new_sp = regs->sp - sizeof_long(); /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case @@ -527,9 +537,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + if (push_ret_address(regs, new_ip)) return false; - regs->sp = new_sp; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; } From 1dc76e6eacef271230d9ff6fd0f91824bda03f44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Apr 2014 18:06:19 +0200 Subject: [PATCH 14/21] uprobes/x86: Kill adjust_ret_addr(), simplify UPROBE_FIX_CALL logic The only insn which could have both UPROBE_FIX_IP and UPROBE_FIX_CALL was 0xe8 "call relative", and now it is handled by branch_xol_ops. So we can change default_post_xol_op(UPROBE_FIX_CALL) to simply push the address of next insn == utask->vaddr + insn.length, just we need to record insn.length into the new auprobe->def.ilen member. Note: if/when we teach branch_xol_ops to support jcxz/loopz we can remove the "correction" logic, UPROBE_FIX_IP can use the same address. Signed-off-by: Oleg Nesterov --- arch/x86/include/asm/uprobes.h | 1 + arch/x86/kernel/uprobes.c | 24 +++--------------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 9ce25ce04fee..a040d493a4f9 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -54,6 +54,7 @@ struct arch_uprobe { long riprel_target; #endif u8 fixups; + u8 ilen; } def; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index df75913acfc0..5bcce852628a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -418,24 +418,6 @@ static int push_ret_address(struct pt_regs *regs, unsigned long ip) return 0; } -/* - * Adjust the return address pushed by a call insn executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) -{ - int rasize = sizeof_long(); - long ra; - - if (copy_from_user(&ra, (void __user *)sp, rasize)) - return -EFAULT; - - ra += correction; - if (copy_to_user((void __user *)sp, &ra, rasize)) - return -EFAULT; - - return 0; -} - static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; @@ -446,10 +428,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs regs->ip += correction; if (auprobe->def.fixups & UPROBE_FIX_CALL) { - if (adjust_ret_addr(regs->sp, correction)) { - regs->sp += sizeof_long(); + regs->sp += sizeof_long(); + if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) return -ERESTART; - } } /* popf; tell the caller to not touch TF */ if (auprobe->def.fixups & UPROBE_FIX_SETF) @@ -687,6 +668,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, handle_riprel_insn(auprobe, &insn); } + auprobe->def.ilen = insn.length; if (fix_ip) auprobe->def.fixups |= UPROBE_FIX_IP; if (fix_call) From 83cd591485e558ab70aed45ce7261ce3f5ee8746 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Apr 2014 18:53:32 +0200 Subject: [PATCH 15/21] uprobes/x86: Cleanup the usage of UPROBE_FIX_IP/UPROBE_FIX_CALL Now that UPROBE_FIX_IP/UPROBE_FIX_CALL are mutually exclusive we can use a single "fix_ip_or_call" enum instead of 2 fix_* booleans. This way the logic looks more understandable and clean to me. While at it, join "case 0xea" with other "ip is correct" ret/lret cases. Also change default_post_xol_op() to use "else if" for the same reason. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/uprobes.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 5bcce852628a..d2792e884d54 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -424,10 +424,9 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = (long)(utask->vaddr - utask->xol_vaddr); handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->def.fixups & UPROBE_FIX_IP) + if (auprobe->def.fixups & UPROBE_FIX_IP) { regs->ip += correction; - - if (auprobe->def.fixups & UPROBE_FIX_CALL) { + } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { regs->sp += sizeof_long(); if (push_ret_address(regs, utask->vaddr + auprobe->def.ilen)) return -ERESTART; @@ -623,7 +622,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { struct insn insn; - bool fix_ip = true, fix_call = false; + u8 fix_ip_or_call = UPROBE_FIX_IP; int ret; ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); @@ -647,21 +646,20 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, case 0xcb: case 0xc2: case 0xca: - fix_ip = false; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; + fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ - fix_call = true; + fix_ip_or_call = UPROBE_FIX_CALL; + break; case 4: case 5: /* jmp or ljmp, indirect */ - fix_ip = false; + fix_ip_or_call = 0; + break; } /* fall through */ default: @@ -669,10 +667,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } auprobe->def.ilen = insn.length; - if (fix_ip) - auprobe->def.fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->def.fixups |= UPROBE_FIX_CALL; + auprobe->def.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; From 1475ee7fadafc6d0c194f2f4cbdae10ed04b9580 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 16:31:59 +0200 Subject: [PATCH 16/21] uprobes/x86: Rename *riprel* helpers to make the naming consistent handle_riprel_insn(), pre_xol_rip_insn() and handle_riprel_post_xol() look confusing and inconsistent. Rename them into riprel_analyze(), riprel_pre_xol(), and riprel_post_xol() respectively. No changes in compiled code. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index d2792e884d54..187be0e15e1d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -268,8 +268,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm) * - There's never a SIB byte. * - The displacement is always 4 bytes. */ -static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; @@ -331,8 +330,7 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { @@ -346,8 +344,8 @@ pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, } } -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, + long *correction) { if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { struct arch_uprobe_task *autask; @@ -376,14 +374,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm) /* * No RIP-relative addressing on 32-bit */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, struct arch_uprobe_task *autask) { } -static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { } @@ -403,7 +401,7 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs, ¤t->utask->autask); return 0; } @@ -423,7 +421,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs struct uprobe_task *utask = current->utask; long correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); + riprel_post_xol(auprobe, regs, &correction); if (auprobe->def.fixups & UPROBE_FIX_IP) { regs->ip += correction; } else if (auprobe->def.fixups & UPROBE_FIX_CALL) { @@ -440,7 +438,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - handle_riprel_post_xol(auprobe, regs, NULL); + riprel_post_xol(auprobe, regs, NULL); } static struct uprobe_xol_ops default_xol_ops = { @@ -663,7 +661,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, } /* fall through */ default: - handle_riprel_insn(auprobe, &insn); + riprel_analyze(auprobe, &insn); } auprobe->def.ilen = insn.length; From 7f55e82bacaaa2c41b8e14d6bc78129b096b67b8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 17:00:46 +0200 Subject: [PATCH 17/21] uprobes/x86: Kill the "autask" arg of riprel_pre_xol() default_pre_xol_op() passes ¤t->utask->autask to riprel_pre_xol() and this is just ugly because it still needs to load current->utask to read ->vaddr. Remove this argument, change riprel_pre_xol() to use current->utask. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 187be0e15e1d..5df1bca7c2bc 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -330,16 +330,17 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { + struct uprobe_task *utask = current->utask; + if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; + utask->autask.saved_scratch_register = regs->ax; + regs->ax = utask->vaddr; regs->ax += auprobe->def.riprel_target; } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; + utask->autask.saved_scratch_register = regs->cx; + regs->cx = utask->vaddr; regs->cx += auprobe->def.riprel_target; } } @@ -377,8 +378,7 @@ static inline bool is_64bit_mm(struct mm_struct *mm) static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, @@ -401,7 +401,7 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - riprel_pre_xol(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs); return 0; } From c90a6950120a7e45f31a22653fe6543507ae64d0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 27 Apr 2014 18:13:31 +0200 Subject: [PATCH 18/21] uprobes/x86: Simplify riprel_{pre,post}_xol() and make them similar Ignoring the "correction" logic riprel_pre_xol() and riprel_post_xol() are very similar but look quite differently. 1. Add the "UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX" check at the start of riprel_pre_xol(), like the same check in riprel_post_xol(). 2. Add the trivial scratch_reg() helper which returns the address of scratch register pre_xol/post_xol need to change. 3. Change these functions to use the new helper and avoid copy-and-paste under if/else branches. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 5df1bca7c2bc..2ebadb252093 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -326,22 +326,24 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) } } +static inline unsigned long * +scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? ®s->ax : ®s->cx; +} + /* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - struct uprobe_task *utask = current->utask; + if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) { - utask->autask.saved_scratch_register = regs->ax; - regs->ax = utask->vaddr; - regs->ax += auprobe->def.riprel_target; - } else if (auprobe->def.fixups & UPROBE_FIX_RIP_CX) { - utask->autask.saved_scratch_register = regs->cx; - regs->cx = utask->vaddr; - regs->cx += auprobe->def.riprel_target; + utask->autask.saved_scratch_register = *sr; + *sr = utask->vaddr + auprobe->def.riprel_target; } } @@ -349,14 +351,10 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) { if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->def.fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); + *sr = utask->autask.saved_scratch_register; /* * The original instruction includes a displacement, and so * is 4 bytes longer than what we've just single-stepped. From ce5f36a58fd1d92cb945cf2568751d40e8598508 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 13:26:01 +0200 Subject: [PATCH 19/21] uprobes/tracing: Make uprobe_perf_close() visible to uprobe_perf_open() Preparation. Move uprobe_perf_close() up before uprobe_perf_open() to avoid the forward declaration in the next patch and make it readable. Signed-off-by: Oleg Nesterov Acked-by: Steven Rostedt Acked-by: Srikar Dronamraju --- kernel/trace/trace_uprobe.c | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c082a7441345..51bd071eaca0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1009,6 +1009,28 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); } +static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +{ + bool done; + + write_lock(&tu->filter.rwlock); + if (event->hw.tp_target) { + list_del(&event->hw.tp_list); + done = tu->filter.nr_systemwide || + (event->hw.tp_target->flags & PF_EXITING) || + uprobe_filter_event(tu, event); + } else { + tu->filter.nr_systemwide--; + done = tu->filter.nr_systemwide; + } + write_unlock(&tu->filter.rwlock); + + if (!done) + uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); + + return 0; +} + static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) { bool done; @@ -1039,28 +1061,6 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) return 0; } -static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) -{ - bool done; - - write_lock(&tu->filter.rwlock); - if (event->hw.tp_target) { - list_del(&event->hw.tp_list); - done = tu->filter.nr_systemwide || - (event->hw.tp_target->flags & PF_EXITING) || - uprobe_filter_event(tu, event); - } else { - tu->filter.nr_systemwide--; - done = tu->filter.nr_systemwide; - } - write_unlock(&tu->filter.rwlock); - - if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); - - return 0; -} - static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { From 927d687480ab7e43d73a003bab58803fc67717d9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Apr 2014 13:33:31 +0200 Subject: [PATCH 20/21] uprobes/tracing: Fix uprobe_perf_open() on uprobe_apply() failure uprobe_perf_open()->uprobe_apply() can fail, but this error is wrongly ignored. Change uprobe_perf_open() to do uprobe_perf_close() and return the error code in this case. Change uprobe_perf_close() to propogate the error from uprobe_apply() as well, although it should not fail. Signed-off-by: Oleg Nesterov Acked-by: Steven Rostedt Acked-by: Srikar Dronamraju --- kernel/trace/trace_uprobe.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 51bd071eaca0..5a7f1a6b3b8b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1026,7 +1026,7 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) write_unlock(&tu->filter.rwlock); if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); + return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); return 0; } @@ -1034,6 +1034,7 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) { bool done; + int err; write_lock(&tu->filter.rwlock); if (event->hw.tp_target) { @@ -1055,10 +1056,13 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) } write_unlock(&tu->filter.rwlock); - if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); - - return 0; + err = 0; + if (!done) { + err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + if (err) + uprobe_perf_close(tu, event); + } + return err; } static bool uprobe_perf_filter(struct uprobe_consumer *uc, From 13f59c5e45be59665c11ddde19799b6295543b7d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Apr 2014 20:15:43 +0200 Subject: [PATCH 21/21] uprobes: Refuse to insert a probe into MAP_SHARED vma valid_vma() rejects the VM_SHARED vmas, but this still allows to insert a probe into the MAP_SHARED but not VM_MAYWRITE vma. Currently this is fine, such a mapping doesn't really differ from the private read-only mmap except mprotect(PROT_WRITE) won't work. However, get_user_pages(FOLL_WRITE | FOLL_FORCE) doesn't allow to COW in this case, and it would be safer to follow the same conventions as mm even if currently this happens to work. After the recent cda540ace6a1 "mm: get_user_pages(write,force) refuse to COW in shared areas" only uprobes can insert an anon page into the shared file-backed area, lets stop this and change valid_vma() to check VM_MAYSHARE instead. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d1edc5e6fd03..7716c40f2c50 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -127,7 +127,7 @@ struct xol_area { */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { - vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; + vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE;