From 1ca3683cc6d2c2ce4204df519c4e4730d037905a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 4 Oct 2023 16:49:42 +0200 Subject: [PATCH 01/52] x86/percpu: Enable named address spaces with known compiler version Enable named address spaces with known compiler versions (GCC 12.1 and later) in order to avoid possible issues with named address spaces with older compilers. Set CC_HAS_NAMED_AS when the compiler satisfies version requirements and set USE_X86_SEG_SUPPORT to signal when segment qualifiers could be used. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20231004145137.86537-3-ubizjak@gmail.com --- arch/x86/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 982b777eadc7..ecb256954351 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2388,6 +2388,13 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_NAMED_AS + def_bool CC_IS_GCC && GCC_VERSION >= 120100 + +config USE_X86_SEG_SUPPORT + def_bool y + depends on CC_HAS_NAMED_AS && SMP + config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) From 9a462b9eafa6dda16ea8429b151edb1fb535d744 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 4 Oct 2023 16:49:43 +0200 Subject: [PATCH 02/52] x86/percpu: Use compiler segment prefix qualifier Using a segment prefix qualifier is cleaner than using a segment prefix in the inline assembly, and provides the compiler with more information, telling it that __seg_gs:[addr] is different than [addr] when it analyzes data dependencies. It also enables various optimizations that will be implemented in the next patches. Use segment prefix qualifiers when they are supported. Unfortunately, gcc does not provide a way to remove segment qualifiers, which is needed to use typeof() to create local instances of the per-CPU variable. For this reason, do not use the segment qualifier for per-CPU variables, and do casting using the segment qualifier instead. Uros: Improve compiler support detection and update the patch to the current mainline. Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20231004145137.86537-4-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 68 +++++++++++++++++++++++----------- arch/x86/include/asm/preempt.h | 2 +- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 20624b80f890..da451202a1b9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -28,26 +28,50 @@ #include #ifdef CONFIG_SMP + +#ifdef CONFIG_CC_HAS_NAMED_AS + +#ifdef CONFIG_X86_64 +#define __percpu_seg_override __seg_gs +#else +#define __percpu_seg_override __seg_fs +#endif + +#define __percpu_prefix "" + +#else /* CONFIG_CC_HAS_NAMED_AS */ + +#define __percpu_seg_override #define __percpu_prefix "%%"__stringify(__percpu_seg)":" + +#endif /* CONFIG_CC_HAS_NAMED_AS */ + +#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset this_cpu_read(this_cpu_off) /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define arch_raw_cpu_ptr(ptr) \ -({ \ - unsigned long tcp_ptr__; \ - asm ("add " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (this_cpu_off), "0" (ptr)); \ - (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +#define arch_raw_cpu_ptr(ptr) \ +({ \ + unsigned long tcp_ptr__; \ + asm ("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (__my_cpu_var(this_cpu_off)), "0" (ptr)); \ + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) -#else +#else /* CONFIG_SMP */ +#define __percpu_seg_override #define __percpu_prefix "" -#endif +#define __force_percpu_prefix "" +#endif /* CONFIG_SMP */ +#define __my_cpu_type(var) typeof(var) __percpu_seg_override +#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr) +#define __my_cpu_var(var) (*__my_cpu_ptr(&var)) #define __percpu_arg(x) __percpu_prefix "%" #x +#define __force_percpu_arg(x) __force_percpu_prefix "%" #x /* * Initialized pointers to per-cpu variables needed for the boot @@ -107,14 +131,14 @@ do { \ (void)pto_tmp__; \ } \ asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \ - : [var] "+m" (_var) \ + : [var] "+m" (__my_cpu_var(_var)) \ : [val] __pcpu_reg_imm_##size(pto_val__)); \ } while (0) #define percpu_unary_op(size, qual, op, _var) \ ({ \ asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \ - : [var] "+m" (_var)); \ + : [var] "+m" (__my_cpu_var(_var))); \ }) /* @@ -144,14 +168,14 @@ do { \ __pcpu_type_##size pfo_val__; \ asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \ : [val] __pcpu_reg_##size("=", pfo_val__) \ - : [var] "m" (_var)); \ + : [var] "m" (__my_cpu_var(_var))); \ (typeof(_var))(unsigned long) pfo_val__; \ }) #define percpu_stable_op(size, op, _var) \ ({ \ __pcpu_type_##size pfo_val__; \ - asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \ + asm(__pcpu_op2_##size(op, __force_percpu_arg(P[var]), "%[val]") \ : [val] __pcpu_reg_##size("=", pfo_val__) \ : [var] "p" (&(_var))); \ (typeof(_var))(unsigned long) pfo_val__; \ @@ -166,7 +190,7 @@ do { \ asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \ __percpu_arg([var])) \ : [tmp] __pcpu_reg_##size("+", paro_tmp__), \ - [var] "+m" (_var) \ + [var] "+m" (__my_cpu_var(_var)) \ : : "memory"); \ (typeof(_var))(unsigned long) (paro_tmp__ + _val); \ }) @@ -187,7 +211,7 @@ do { \ __percpu_arg([var])) \ "\n\tjnz 1b" \ : [oval] "=&a" (pxo_old__), \ - [var] "+m" (_var) \ + [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pxo_new__) \ : "memory"); \ (typeof(_var))(unsigned long) pxo_old__; \ @@ -204,7 +228,7 @@ do { \ asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ __percpu_arg([var])) \ : [oval] "+a" (pco_old__), \ - [var] "+m" (_var) \ + [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pco_new__) \ : "memory"); \ (typeof(_var))(unsigned long) pco_old__; \ @@ -221,7 +245,7 @@ do { \ CC_SET(z) \ : CC_OUT(z) (success), \ [oval] "+a" (pco_old__), \ - [var] "+m" (_var) \ + [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pco_new__) \ : "memory"); \ if (unlikely(!success)) \ @@ -244,7 +268,7 @@ do { \ \ asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ - : [var] "+m" (_var), \ + : [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ @@ -276,7 +300,7 @@ do { \ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ CC_SET(z) \ : CC_OUT(z) (success), \ - [var] "+m" (_var), \ + [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ @@ -313,7 +337,7 @@ do { \ \ asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ - : [var] "+m" (_var), \ + : [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ @@ -345,7 +369,7 @@ do { \ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ CC_SET(z) \ : CC_OUT(z) (success), \ - [var] "+m" (_var), \ + [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ @@ -494,7 +518,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, asm volatile("btl "__percpu_arg(2)",%1" CC_SET(c) : CC_OUT(c) (oldbit) - : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); + : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr)); return oldbit; } diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 4527e1430c6d..4b2a35d8d56a 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -92,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, + return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e, __percpu_arg([var])); } From ca4256348660cb2162668ec3d13d1f921d05374a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 4 Oct 2023 21:23:08 +0200 Subject: [PATCH 03/52] x86/percpu: Use C for percpu read/write accessors The percpu code mostly uses inline assembly. Using segment qualifiers allows to use C code instead, which enables the compiler to perform various optimizations (e.g. propagation of memory arguments). Convert percpu read and write accessors to C code, so the memory argument can be propagated to the instruction that uses this argument. Some examples of propagations: a) into sign/zero extensions: the code improves from: 65 8a 05 00 00 00 00 mov %gs:0x0(%rip),%al 0f b6 c0 movzbl %al,%eax to: 65 0f b6 05 00 00 00 movzbl %gs:0x0(%rip),%eax 00 and in a similar way for: movzbl %gs:0x0(%rip),%edx movzwl %gs:0x0(%rip),%esi movzbl %gs:0x78(%rbx),%eax movslq %gs:0x0(%rip),%rdx movslq %gs:(%rdi),%rbx b) into compares: the code improves from: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax a9 00 00 0f 00 test $0xf0000,%eax to: 65 f7 05 00 00 00 00 testl $0xf0000,%gs:0x0(%rip) 00 00 0f 00 and in a similar way for: testl $0xf0000,%gs:0x0(%rip) testb $0x1,%gs:0x0(%rip) testl $0xff00,%gs:0x0(%rip) cmpb $0x0,%gs:0x0(%rip) cmp %gs:0x0(%rip),%r14d cmpw $0x8,%gs:0x0(%rip) cmpb $0x0,%gs:(%rax) c) into other insns: the code improves from: 1a355: 83 fa ff cmp $0xffffffff,%edx 1a358: 75 07 jne 1a361 <...> 1a35a: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx 1a361: to: 1a35a: 83 fa ff cmp $0xffffffff,%edx 1a35d: 65 0f 44 15 00 00 00 cmove %gs:0x0(%rip),%edx 1a364: 00 The above propagations result in the following code size improvements for current mainline kernel (with the default config), compiled with: # gcc (GCC) 12.3.1 20230508 (Red Hat 12.3.1-1) text data bss dec filename 25508862 4386540 808388 30703790 vmlinux-vanilla.o 25500922 4386532 808388 30695842 vmlinux-new.o Co-developed-by: Nadav Amit Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20231004192404.31733-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 65 +++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index da451202a1b9..60ea7755c0fe 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -400,13 +400,66 @@ do { \ #define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) #define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) +#ifdef CONFIG_USE_X86_SEG_SUPPORT + +#define __raw_cpu_read(qual, pcp) \ +({ \ + *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \ +}) + +#define __raw_cpu_write(qual, pcp, val) \ +do { \ + *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \ +} while (0) + +#define raw_cpu_read_1(pcp) __raw_cpu_read(, pcp) +#define raw_cpu_read_2(pcp) __raw_cpu_read(, pcp) +#define raw_cpu_read_4(pcp) __raw_cpu_read(, pcp) +#define raw_cpu_write_1(pcp, val) __raw_cpu_write(, pcp, val) +#define raw_cpu_write_2(pcp, val) __raw_cpu_write(, pcp, val) +#define raw_cpu_write_4(pcp, val) __raw_cpu_write(, pcp, val) + +#define this_cpu_read_1(pcp) __raw_cpu_read(volatile, pcp) +#define this_cpu_read_2(pcp) __raw_cpu_read(volatile, pcp) +#define this_cpu_read_4(pcp) __raw_cpu_read(volatile, pcp) +#define this_cpu_write_1(pcp, val) __raw_cpu_write(volatile, pcp, val) +#define this_cpu_write_2(pcp, val) __raw_cpu_write(volatile, pcp, val) +#define this_cpu_write_4(pcp, val) __raw_cpu_write(volatile, pcp, val) + +#ifdef CONFIG_X86_64 +#define raw_cpu_read_8(pcp) __raw_cpu_read(, pcp) +#define raw_cpu_write_8(pcp, val) __raw_cpu_write(, pcp, val) + +#define this_cpu_read_8(pcp) __raw_cpu_read(volatile, pcp) +#define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val) +#endif + +#else /* CONFIG_USE_X86_SEG_SUPPORT */ + #define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp) #define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp) #define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp) - #define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val) #define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val) #define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val) + +#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp) +#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp) +#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp) +#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val) + +#ifdef CONFIG_X86_64 +#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp) +#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val) + +#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp) +#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) +#endif + +#endif /* CONFIG_USE_X86_SEG_SUPPORT */ + #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) #define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) #define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) @@ -432,12 +485,6 @@ do { \ #define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) -#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp) -#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp) -#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp) -#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val) -#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val) -#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val) #define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val) #define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val) #define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val) @@ -476,8 +523,6 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp) -#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val) #define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val) #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) @@ -486,8 +531,6 @@ do { \ #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval) #define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval) -#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp) -#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) #define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) From e29aad08b1da7772b362537be32335c0394e65fe Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Oct 2023 17:13:48 +0200 Subject: [PATCH 04/52] x86/percpu: Disable named address spaces for KASAN -fsanitize=kernel-address (KASAN) is at the moment incompatible with named address spaces - see GCC PR sanitizer/111736: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111736 GCC is doing a KASAN check on a percpu address which it shouldn't do, and didn't used to do because we did the access using inline asm. But now that GCC does the accesses as normal (albeit special address space) memory accesses, the KASAN code triggers on them too, and it all goes to hell in a handbasket very quickly. Those percpu accessor functions need to disable any KASAN checking or other sanitizer checking. Not on the percpu address, because that's not a "real" address, it's obviously just the offset from the segment register. And GCC should probably not have generated such code in the first place, so arguably this is a bug with -fsanitize=kernel-address. The patch also removes a stale dependency on CONFIG_SMP. Reported-by: kernel test robot Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20231009151409.53656-1-ubizjak@gmail.com Closes: https://lore.kernel.org/oe-lkp/202310071301.a5113890-oliver.sang@intel.com --- arch/x86/Kconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ecb256954351..54e79d3061f9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2393,7 +2393,12 @@ config CC_HAS_NAMED_AS config USE_X86_SEG_SUPPORT def_bool y - depends on CC_HAS_NAMED_AS && SMP + depends on CC_HAS_NAMED_AS + # + # -fsanitize=kernel-address (KASAN) is at the moment incompatible + # with named address spaces - see GCC PR sanitizer/111736. + # + depends on !KASAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) From a048d3abae7c33f0a3f4575fab15ac5504d443f7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 15 Oct 2023 22:24:39 +0200 Subject: [PATCH 05/52] x86/percpu: Rewrite arch_raw_cpu_ptr() to be easier for compilers to optimize Implement arch_raw_cpu_ptr() as a load from this_cpu_off and then add the ptr value to the base. This way, the compiler can propagate addend to the following instruction and simplify address calculation. E.g.: address calcuation in amd_pmu_enable_virt() improves from: 48 c7 c0 00 00 00 00 mov $0x0,%rax 87b7: R_X86_64_32S cpu_hw_events 65 48 03 05 00 00 00 add %gs:0x0(%rip),%rax 00 87bf: R_X86_64_PC32 this_cpu_off-0x4 48 c7 80 28 13 00 00 movq $0x0,0x1328(%rax) 00 00 00 00 to: 65 48 8b 05 00 00 00 mov %gs:0x0(%rip),%rax 00 8798: R_X86_64_PC32 this_cpu_off-0x4 48 c7 80 00 00 00 00 movq $0x0,0x0(%rax) 00 00 00 00 87a6: R_X86_64_32S cpu_hw_events+0x1328 The compiler also eliminates additional redundant loads from this_cpu_off, reducing the number of percpu offset reads from 1668 to 1646 on a test build, a -1.3% reduction. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Uros Bizjak Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231015202523.189168-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 60ea7755c0fe..915675f3ad60 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -56,9 +56,11 @@ #define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - asm ("add " __percpu_arg(1) ", %0" \ + asm ("mov " __percpu_arg(1) ", %0" \ : "=r" (tcp_ptr__) \ - : "m" (__my_cpu_var(this_cpu_off)), "0" (ptr)); \ + : "m" (__my_cpu_var(this_cpu_off))); \ + \ + tcp_ptr__ += (unsigned long)(ptr); \ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) #else /* CONFIG_SMP */ From 1d10f3aec2bb734b4b594afe8c1bd0aa656a7e4d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 15 Oct 2023 22:24:40 +0200 Subject: [PATCH 06/52] x86/percpu: Use C for arch_raw_cpu_ptr(), to improve code generation Implement arch_raw_cpu_ptr() in C to allow the compiler to perform better optimizations, such as setting an appropriate base to compute the address. The compiler is free to choose either MOV or ADD from this_cpu_off address to construct the optimal final address. There are some other issues when memory access to the percpu area is implemented with an asm. Compilers can not eliminate asm common subexpressions over basic block boundaries, but are extremely good at optimizing memory access. By implementing arch_raw_cpu_ptr() in C, the compiler can eliminate additional redundant loads from this_cpu_off, further reducing the number of percpu offset reads from 1646 to 1631 on a test build, a -0.9% reduction. Co-developed-by: Nadav Amit Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Uros Bizjak Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231015202523.189168-2-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 915675f3ad60..54746903b8c3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -49,6 +49,21 @@ #define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset this_cpu_read(this_cpu_off) +#ifdef CONFIG_USE_X86_SEG_SUPPORT +/* + * Efficient implementation for cases in which the compiler supports + * named address spaces. Allows the compiler to perform additional + * optimizations that can save more instructions. + */ +#define arch_raw_cpu_ptr(ptr) \ +({ \ + unsigned long tcp_ptr__; \ + tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \ + \ + tcp_ptr__ += (unsigned long)(ptr); \ + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +}) +#else /* CONFIG_USE_X86_SEG_SUPPORT */ /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. @@ -63,6 +78,8 @@ tcp_ptr__ += (unsigned long)(ptr); \ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) +#endif /* CONFIG_USE_X86_SEG_SUPPORT */ + #else /* CONFIG_SMP */ #define __percpu_seg_override #define __percpu_prefix "" From e39828d2c1c0781ccfcf742791daf88fdfa481ea Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 16 Oct 2023 22:07:30 +0200 Subject: [PATCH 07/52] x86/percpu: Use the correct asm operand modifier in percpu_stable_op() The "P" asm operand modifier is a x86 target-specific modifier. When used for a constant, it drops all syntax-specific prefixes and issues the bare constant. This modifier is not correct for address handling, in this case a generic "a" operand modifier should be used. The "a" asm operand modifier substitutes a memory reference, with the actual operand treated as address. For x86_64, when a symbol is provided, the "a" modifier emits "sym(%rip)" instead of "sym", enabling shorter %rip-relative addressing. Clang allows only "i" and "r" operand constraints with an "a" modifier, so the patch normalizes the modifier/constraint pair to "a"/"i" which is consistent between both compilers. The patch reduces code size of a test build by 4072 bytes: text data bss dec hex filename 25523268 4388300 808452 30720020 1d4c014 vmlinux-old.o 25519196 4388300 808452 30715948 1d4b02c vmlinux-new.o [ mingo: Changelog clarity. ] Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Uros Bizjak Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231016200755.287403-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 54746903b8c3..ac3220aeb779 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -194,9 +194,9 @@ do { \ #define percpu_stable_op(size, op, _var) \ ({ \ __pcpu_type_##size pfo_val__; \ - asm(__pcpu_op2_##size(op, __force_percpu_arg(P[var]), "%[val]") \ + asm(__pcpu_op2_##size(op, __force_percpu_arg(a[var]), "%[val]") \ : [val] __pcpu_reg_##size("=", pfo_val__) \ - : [var] "p" (&(_var))); \ + : [var] "i" (&(_var))); \ (typeof(_var))(unsigned long) pfo_val__; \ }) From 24b8a23638cbf92449c353f828b1d309548c78f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Oct 2023 20:41:58 +0200 Subject: [PATCH 08/52] x86/fpu: Clean up FPU switching in the middle of task switching It happens to work, but it's very very wrong, because our 'current' macro is magic that is supposedly loading a stable value. It just happens to be not quite stable enough and the compilers re-load the value enough for this code to work. But it's wrong. The whole struct fpu *prev_fpu = &prev->fpu; thing in __switch_to() is pretty ugly. There's no reason why we should look at that 'prev_fpu' pointer there, or pass it down. And it only generates worse code, in how it loads 'current' when __switch_to() has the right task pointers. The attached patch not only cleans this up, it actually generates better code too: (a) it removes one push/pop pair at entry/exit because there's one less register used (no 'current') (b) it removes that pointless load of 'current' because it just uses the right argument: - movq %gs:pcpu_hot(%rip), %r12 - testq $16384, (%r12) + testq $16384, (%rdi) Signed-off-by: Linus Torvalds Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231018184227.446318-1-ubizjak@gmail.com --- arch/x86/include/asm/fpu/sched.h | 10 ++++++---- arch/x86/kernel/process_32.c | 7 +++---- arch/x86/kernel/process_64.c | 7 +++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index ca6e5e5f16b2..c485f1944c5f 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -37,10 +37,12 @@ extern void fpu_flush_thread(void); * The FPU context is only stored/restored for a user task and * PF_KTHREAD is used to distinguish between kernel and user threads. */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +static inline void switch_fpu_prepare(struct task_struct *old, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { + !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) { + struct fpu *old_fpu = &old->thread.fpu; + save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the @@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) * Delay loading of the complete FPU state until the return to userland. * PKRU is handled separately. */ -static inline void switch_fpu_finish(void) +static inline void switch_fpu_finish(struct task_struct *new) { if (cpu_feature_enabled(X86_FEATURE_FPU)) - set_thread_flag(TIF_NEED_FPU_LOAD); + set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD); } #endif /* _ASM_X86_FPU_SCHED_H */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 708c87b88cc1..0917c7f25720 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_p, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) raw_cpu_write(pcpu_hot.current_task, next_p); - switch_fpu_finish(); + switch_fpu_finish(next_p); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 33b268747bb7..1553e19904e0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -562,14 +562,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(pcpu_hot.hardirq_stack_inuse)); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -623,7 +622,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); From 39d64ee59ceee0fb61243eab3c4b7b4492f80df2 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 17 Oct 2023 18:27:32 +0200 Subject: [PATCH 09/52] x86/percpu: Correct PER_CPU_VAR() usage to include symbol and its addend The PER_CPU_VAR() macro should be applied to a symbol and its addend. Inconsistent usage is currently harmless, but needs to be corrected before %rip-relative addressing is introduced to the PER_CPU_VAR() macro. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Sean Christopherson --- arch/x86/entry/calling.h | 2 +- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/head_64.S | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index f6907627172b..47368ab0bda0 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -173,7 +173,7 @@ For 32-bit we have the following conventions - kernel is built with .endm #define THIS_CPU_user_pcid_flush_mask \ - PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask) .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 6e6af42e044a..d4e094b2c877 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -305,7 +305,7 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 #define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page + GDT_ESPFIX_OFFSET) ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b940e928c808..6d236652fceb 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary + movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary) #endif /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ea6995920b7a..bfe5ec2f4f83 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -449,7 +449,7 @@ SYM_CODE_START(soft_restart_cpu) UNWIND_HINT_END_OF_STACK /* Find the idle task stack */ - movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code From aa47f90cd4331e1809d56c72fcbdbbe0a85e5992 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 17 Oct 2023 18:27:33 +0200 Subject: [PATCH 10/52] x86/percpu, xen: Correct PER_CPU_VAR() usage to include symbol and its addend The PER_CPU_VAR() macro should be applied to a symbol and its addend. Inconsistent usage is currently harmless, but needs to be corrected before %rip-relative addressing is introduced to the PER_CPU_VAR() macro. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Reviewed-by: Boris Ostrovsky Cc: linux-kernel@vger.kernel.org Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Sean Christopherson --- arch/x86/xen/xen-asm.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..448958ddbaf8 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -28,7 +28,7 @@ * non-zero. */ SYM_FUNC_START(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) RET SYM_FUNC_END(xen_irq_disable_direct) @@ -69,7 +69,7 @@ SYM_FUNC_END(check_events) SYM_FUNC_START(xen_irq_enable_direct) FRAME_BEGIN /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) /* * Preempt here doesn't matter because that will deal with any @@ -78,7 +78,7 @@ SYM_FUNC_START(xen_irq_enable_direct) */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending) jz 1f call check_events @@ -97,7 +97,7 @@ SYM_FUNC_END(xen_irq_enable_direct) * x86 use opposite senses (mask vs enable). */ SYM_FUNC_START(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) setz %ah addb %ah, %ah RET @@ -113,7 +113,7 @@ SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN - _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX + _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX FRAME_END RET SYM_FUNC_END(xen_read_cr2_direct); From 59bec00ace28d565ae0a68b23063ef3b961d82d5 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 17 Oct 2023 18:27:34 +0200 Subject: [PATCH 11/52] x86/percpu: Introduce %rip-relative addressing to PER_CPU_VAR() Introduce x86_64 %rip-relative addressing to the PER_CPU_VAR() macro. Instructions using %rip-relative address operand are one byte shorter than their absolute address counterparts and are also compatible with position independent executable (-fpie) builds. The patch reduces code size of a test kernel build by 150 bytes. The PER_CPU_VAR() macro is intended to be applied to a symbol and should not be used with register operands. Introduce the new __percpu macro and use it in cmpxchg{8,16}b_emu.S instead. Also add a missing function comment to this_cpu_cmpxchg8b_emu(). No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Sean Christopherson --- arch/x86/include/asm/percpu.h | 12 ++++++++---- arch/x86/lib/cmpxchg16b_emu.S | 12 ++++++------ arch/x86/lib/cmpxchg8b_emu.S | 30 +++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ac3220aeb779..bbcc1ca737f0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -4,17 +4,21 @@ #ifdef CONFIG_X86_64 #define __percpu_seg gs +#define __percpu_rel (%rip) #else #define __percpu_seg fs +#define __percpu_rel #endif #ifdef __ASSEMBLY__ #ifdef CONFIG_SMP -#define PER_CPU_VAR(var) %__percpu_seg:var -#else /* ! SMP */ -#define PER_CPU_VAR(var) var -#endif /* SMP */ +#define __percpu %__percpu_seg: +#else +#define __percpu +#endif + +#define PER_CPU_VAR(var) __percpu(var)__percpu_rel #ifdef CONFIG_X86_64_SMP #define INIT_PER_CPU_VAR(var) init_per_cpu__##var diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 6962df315793..4fb44894ad87 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) cli /* if (*ptr == old) */ - cmpq PER_CPU_VAR(0(%rsi)), %rax + cmpq __percpu (%rsi), %rax jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx + cmpq __percpu 8(%rsi), %rdx jne .Lnot_same /* *ptr = new */ - movq %rbx, PER_CPU_VAR(0(%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + movq %rbx, __percpu (%rsi) + movq %rcx, __percpu 8(%rsi) /* set ZF in EFLAGS to indicate success */ orl $X86_EFLAGS_ZF, (%rsp) @@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) /* *ptr != old */ /* old = *ptr */ - movq PER_CPU_VAR(0(%rsi)), %rax - movq PER_CPU_VAR(8(%rsi)), %rdx + movq __percpu (%rsi), %rax + movq __percpu 8(%rsi), %rdx /* clear ZF in EFLAGS to indicate failure */ andl $(~X86_EFLAGS_ZF), (%rsp) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 49805257b125..8632d7dd1f00 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu) pushfl cli - cmpl 0(%esi), %eax + cmpl (%esi), %eax jne .Lnot_same cmpl 4(%esi), %edx jne .Lnot_same - movl %ebx, 0(%esi) + movl %ebx, (%esi) movl %ecx, 4(%esi) orl $X86_EFLAGS_ZF, (%esp) @@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu) RET .Lnot_same: - movl 0(%esi), %eax + movl (%esi), %eax movl 4(%esi), %edx andl $(~X86_EFLAGS_ZF), (%esp) @@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu) #ifndef CONFIG_UML +/* + * Emulate 'cmpxchg8b %fs:(%rsi)' + * + * Inputs: + * %esi : memory location to compare + * %eax : low 32 bits of old value + * %edx : high 32 bits of old value + * %ebx : low 32 bits of new value + * %ecx : high 32 bits of new value + * + * Notably this is not LOCK prefixed and is not safe against NMIs + */ SYM_FUNC_START(this_cpu_cmpxchg8b_emu) pushfl cli - cmpl PER_CPU_VAR(0(%esi)), %eax + cmpl __percpu (%esi), %eax jne .Lnot_same2 - cmpl PER_CPU_VAR(4(%esi)), %edx + cmpl __percpu 4(%esi), %edx jne .Lnot_same2 - movl %ebx, PER_CPU_VAR(0(%esi)) - movl %ecx, PER_CPU_VAR(4(%esi)) + movl %ebx, __percpu (%esi) + movl %ecx, __percpu 4(%esi) orl $X86_EFLAGS_ZF, (%esp) @@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu) RET .Lnot_same2: - movl PER_CPU_VAR(0(%esi)), %eax - movl PER_CPU_VAR(4(%esi)), %edx + movl __percpu (%esi), %eax + movl __percpu 4(%esi), %edx andl $(~X86_EFLAGS_ZF), (%esp) From ed2f752e0e0a21d941ca0ee539ef3d4cd576bc5e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 20 Oct 2023 18:19:20 +0200 Subject: [PATCH 12/52] x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation Some variables in pcpu_hot, currently current_task and top_of_stack are actually per-thread variables implemented as per-CPU variables and thus stable for the duration of the respective task. There is already an attempt to eliminate redundant reads from these variables using this_cpu_read_stable() asm macro, which hides the dependency on the read memory address. However, the compiler has limited ability to eliminate asm common subexpressions, so this approach results in a limited success. The solution is to allow more aggressive elimination by aliasing pcpu_hot into a const-qualified const_pcpu_hot, and to read stable per-CPU variables from this constant copy. The current per-CPU infrastructure does not support reads from const-qualified variables. However, when the compiler supports segment qualifiers, it is possible to declare the const-aliased variable in the relevant named address space. The compiler considers access to the variable, declared in this way, as a read from a constant location, and will optimize reads from the variable accordingly. By implementing constant-qualified const_pcpu_hot, the compiler can eliminate redundant reads from the constant variables, reducing the number of loads from current_task from 3766 to 3217 on a test build, a -14.6% reduction. The reduction of loads translates to the following code savings: text data bss dec hex filename 25,477,353 4389456 808452 30675261 1d4113d vmlinux-old.o 25,476,074 4389440 808452 30673966 1d40c2e vmlinux-new.o representing a code size reduction of -1279 bytes. [ mingo: Updated the changelog, EXPORT(const_pcpu_hot). ] Co-developed-by: Nadav Amit Signed-off-by: Nadav Amit Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231020162004.135244-1-ubizjak@gmail.com --- arch/x86/include/asm/current.h | 7 +++++++ arch/x86/include/asm/percpu.h | 6 +++--- arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/vmlinux.lds.S | 1 + include/linux/compiler.h | 2 +- 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index a1168e7b69e5..0538d2436673 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -36,8 +36,15 @@ static_assert(sizeof(struct pcpu_hot) == 64); DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); +/* const-qualified alias to pcpu_hot, aliased by linker. */ +DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, + const_pcpu_hot); + static __always_inline struct task_struct *get_current(void) { + if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) + return const_pcpu_hot.current_task; + return this_cpu_read_stable(pcpu_hot.current_task); } diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index bbcc1ca737f0..b86b27d15e52 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -413,9 +413,9 @@ do { \ * accessed while this_cpu_read_stable() allows the value to be cached. * this_cpu_read_stable() is more efficient and can be used if its value * is guaranteed to be valid across cpus. The current users include - * get_current() and get_thread_info() both of which are actually - * per-thread variables implemented as per-cpu variables and thus - * stable for the duration of the respective task. + * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are + * actually per-thread variables implemented as per-CPU variables and + * thus stable for the duration of the respective task. */ #define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) #define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0086920cda06..b47a9975233f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -518,6 +518,9 @@ static __always_inline unsigned long current_top_of_stack(void) * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ + if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) + return pcpu_hot.top_of_stack; + return this_cpu_read_stable(pcpu_hot.top_of_stack); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 382d4e6b848d..4cc0ab0dfbb5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2051,6 +2051,7 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); +EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 83d41c2601d7..e701f2dcea29 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ ENTRY(phys_startup_64) #endif jiffies = jiffies_64; +const_pcpu_hot = pcpu_hot; #if defined(CONFIG_X86_64) /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..bf9815eaf4aa 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -212,7 +212,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define ___ADDRESSABLE(sym, __attrs) \ static void * __used __attrs \ - __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym; #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) From 0548eb067ed664b93043e033295ca71e3e706245 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 24 Oct 2023 16:28:14 +0200 Subject: [PATCH 13/52] x86/percpu: Return correct variable from current_top_of_stack() current_top_of_stack() should return variable from _seg_gs qualified named address space when CONFIG_USE_X86_SEG_SUPPORT=y is enbled. Fixes: ed2f752e0e0a ("x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation") Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231024142830.3226-1-ubizjak@gmail.com Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Sean Christopherson --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b47a9975233f..f20e87632669 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,7 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return pcpu_hot.top_of_stack; + return const_pcpu_hot.top_of_stack; return this_cpu_read_stable(pcpu_hot.top_of_stack); } From 43bda69ed9e3b86d0ba5ff9256e437d50074d7d5 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 5 Nov 2023 22:34:35 +0100 Subject: [PATCH 14/52] x86/percpu: Define PER_CPU_VAR macro also for !__ASSEMBLY__ Some C source files define 'asm' statements that use PER_CPU_VAR, so make PER_CPU_VAR macro available also without __ASSEMBLY__. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231105213731.1878100-2-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b86b27d15e52..0f12b2004b94 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -84,10 +84,15 @@ }) #endif /* CONFIG_USE_X86_SEG_SUPPORT */ +#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel + #else /* CONFIG_SMP */ #define __percpu_seg_override #define __percpu_prefix "" #define __force_percpu_prefix "" + +#define PER_CPU_VAR(var) (var)__percpu_rel + #endif /* CONFIG_SMP */ #define __my_cpu_type(var) typeof(var) __percpu_seg_override From 17bce3b2ae2d52e8c5c12274ce4c3a631ce9e66b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 5 Nov 2023 22:34:36 +0100 Subject: [PATCH 15/52] x86/callthunks: Handle %rip-relative relocations in call thunk template Contrary to alternatives, relocations are currently not supported in call thunk templates. Re-use the existing infrastructure from alternative.c to allow %rip-relative relocations when copying call thunk template from its storage location. The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk template, which is currently limited to use absolute address. Reuse existing relocation infrastructure from alternative.c., as suggested by Peter Zijlstra. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231105213731.1878100-3-ubizjak@gmail.com --- arch/x86/include/asm/text-patching.h | 2 ++ arch/x86/kernel/alternative.c | 3 +-- arch/x86/kernel/callthunks.c | 32 ++++++++++++++++++++++------ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 29832c338cdc..ba8d900f3ebe 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -18,6 +18,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len); + /* * Currently, the max observed size in the kernel code is * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a5ead6a6d233..aa864157f57e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -325,8 +325,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len) return (target < src || target > src + src_len); } -static void __init_or_module noinline -apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) { int prev, target = 0; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index c06bfc086565..f56fa303d643 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -24,6 +24,8 @@ static int __initdata_or_module debug_callthunks; +#define MAX_PATCH_LEN (255-1) + #define prdbg(fmt, args...) \ do { \ if (debug_callthunks) \ @@ -184,10 +186,15 @@ static const u8 nops[] = { static void *patch_dest(void *dest, bool direct) { unsigned int tsize = SKL_TMPL_SIZE; + u8 insn_buff[MAX_PATCH_LEN]; u8 *pad = dest - tsize; + memcpy(insn_buff, skl_call_thunk_template, tsize); + apply_relocation(insn_buff, tsize, pad, + skl_call_thunk_template, tsize); + /* Already patched? */ - if (!bcmp(pad, skl_call_thunk_template, tsize)) + if (!bcmp(pad, insn_buff, tsize)) return pad; /* Ensure there are nops */ @@ -197,9 +204,9 @@ static void *patch_dest(void *dest, bool direct) } if (direct) - memcpy(pad, skl_call_thunk_template, tsize); + memcpy(pad, insn_buff, tsize); else - text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true); + text_poke_copy_locked(pad, insn_buff, tsize, true); return pad; } @@ -297,20 +304,27 @@ void *callthunks_translate_call_dest(void *dest) static bool is_callthunk(void *addr) { unsigned int tmpl_size = SKL_TMPL_SIZE; - void *tmpl = skl_call_thunk_template; + u8 insn_buff[MAX_PATCH_LEN]; unsigned long dest; + u8 *pad; dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); if (!thunks_initialized || skip_addr((void *)dest)) return false; - return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); + *pad = dest - tmpl_size; + + memcpy(insn_buff, skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, tmpl_size, pad, + skl_call_thunk_template, tmpl_size); + + return !bcmp(pad, insn_buff, tmpl_size); } int x86_call_depth_emit_accounting(u8 **pprog, void *func) { unsigned int tmpl_size = SKL_TMPL_SIZE; - void *tmpl = skl_call_thunk_template; + u8 insn_buff[MAX_PATCH_LEN]; if (!thunks_initialized) return 0; @@ -319,7 +333,11 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) if (func && is_callthunk(func)) return 0; - memcpy(*pprog, tmpl, tmpl_size); + memcpy(insn_buff, skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, tmpl_size, *pprog, + skl_call_thunk_template, tmpl_size); + + memcpy(*pprog, insn_buff, tmpl_size); *pprog += tmpl_size; return tmpl_size; } From 2adeed985a42ff3334e6898c8c0827e7626c1336 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 5 Nov 2023 22:34:37 +0100 Subject: [PATCH 16/52] x86/callthunks: Fix and unify call thunks assembly snippets Currently thunk debug macros explicitly define %gs: segment register prefix for their percpu variables. This is not compatible with !CONFIG_SMP, which requires non-prefixed percpu variables. Fix call thunks debug macros to use PER_CPU_VAR macro from percpu.h to conditionally use %gs: segment register prefix, depending on CONFIG_SMP. Finally, unify ASM_ prefixed assembly macros with their non-prefixed variants. With support of %rip-relative relocations in place, call thunk templates allow %rip-relative addressing, so unified assembly snippet can be used everywhere. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231105213731.1878100-4-ubizjak@gmail.com --- arch/x86/include/asm/nospec-branch.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c55cc243592e..65fbf6b853af 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -59,13 +59,13 @@ #ifdef CONFIG_CALL_THUNKS_DEBUG # define CALL_THUNKS_DEBUG_INC_CALLS \ - incq %gs:__x86_call_count; + incq PER_CPU_VAR(__x86_call_count); # define CALL_THUNKS_DEBUG_INC_RETS \ - incq %gs:__x86_ret_count; + incq PER_CPU_VAR(__x86_ret_count); # define CALL_THUNKS_DEBUG_INC_STUFFS \ - incq %gs:__x86_stuffs_count; + incq PER_CPU_VAR(__x86_stuffs_count); # define CALL_THUNKS_DEBUG_INC_CTXSW \ - incq %gs:__x86_ctxsw_count; + incq PER_CPU_VAR(__x86_ctxsw_count); #else # define CALL_THUNKS_DEBUG_INC_CALLS # define CALL_THUNKS_DEBUG_INC_RETS @@ -80,9 +80,6 @@ #define CREDIT_CALL_DEPTH \ movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); -#define ASM_CREDIT_CALL_DEPTH \ - movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); - #define RESET_CALL_DEPTH \ xor %eax, %eax; \ bts $63, %rax; \ @@ -95,20 +92,14 @@ CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ - sarq $5, %gs:pcpu_hot + X86_call_depth; \ - CALL_THUNKS_DEBUG_INC_CALLS - -#define ASM_INCREMENT_CALL_DEPTH \ sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS #else #define CREDIT_CALL_DEPTH -#define ASM_CREDIT_CALL_DEPTH #define RESET_CALL_DEPTH -#define INCREMENT_CALL_DEPTH -#define ASM_INCREMENT_CALL_DEPTH #define RESET_CALL_DEPTH_FROM_CALL +#define INCREMENT_CALL_DEPTH #endif /* @@ -158,7 +149,7 @@ jnz 771b; \ /* barrier for jnz misprediction */ \ lfence; \ - ASM_CREDIT_CALL_DEPTH \ + CREDIT_CALL_DEPTH \ CALL_THUNKS_DEBUG_INC_CTXSW #else /* @@ -325,7 +316,7 @@ .macro CALL_DEPTH_ACCOUNT #ifdef CONFIG_CALL_DEPTH_TRACKING ALTERNATIVE "", \ - __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH + __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm From 0978d64f9406122c369d5f46e1eb855646f6c32c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 3 Nov 2023 11:48:22 +0100 Subject: [PATCH 17/52] x86/acpi: Use %rip-relative addressing in wakeup_64.S This is a "nice-to-have" change with minor code generation benefits: - Instruction with %rip-relative address operand is one byte shorter than its absolute address counterpart, - it is also compatible with position independent executable (-fpie) builds, - it is also consistent with what the compiler emits by default when a symbol is accessed. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20231103104900.409470-1-ubizjak@gmail.com --- arch/x86/kernel/acpi/wakeup_64.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index d5d8a352eafa..94ff83f3d3fe 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -17,7 +17,7 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ SYM_FUNC_START(wakeup_long64) - movq saved_magic, %rax + movq saved_magic(%rip), %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax je 2f @@ -33,14 +33,14 @@ SYM_FUNC_START(wakeup_long64) movw %ax, %es movw %ax, %fs movw %ax, %gs - movq saved_rsp, %rsp + movq saved_rsp(%rip), %rsp - movq saved_rbx, %rbx - movq saved_rdi, %rdi - movq saved_rsi, %rsi - movq saved_rbp, %rbp + movq saved_rbx(%rip), %rbx + movq saved_rdi(%rip), %rdi + movq saved_rsi(%rip), %rsi + movq saved_rbp(%rip), %rbp - movq saved_rip, %rax + movq saved_rip(%rip), %rax ANNOTATE_RETPOLINE_SAFE jmp *%rax SYM_FUNC_END(wakeup_long64) @@ -72,11 +72,11 @@ SYM_FUNC_START(do_suspend_lowlevel) movq $.Lresume_point, saved_rip(%rip) - movq %rsp, saved_rsp - movq %rbp, saved_rbp - movq %rbx, saved_rbx - movq %rdi, saved_rdi - movq %rsi, saved_rsi + movq %rsp, saved_rsp(%rip) + movq %rbp, saved_rbp(%rip) + movq %rbx, saved_rbx(%rip) + movq %rdi, saved_rdi(%rip) + movq %rsi, saved_rsi(%rip) addq $8, %rsp movl $3, %edi From 6724ba89e0b03667d56616614f55e1f772d38fdb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Nov 2023 20:15:51 +0100 Subject: [PATCH 18/52] x86/callthunks: Mark apply_relocation() as __init_or_module Do it like the rest of the methods using it. Signed-off-by: Ingo Molnar Cc: Uros Bizjak Link: https://lore.kernel.org/r/20231105213731.1878100-3-ubizjak@gmail.com --- arch/x86/include/asm/text-patching.h | 2 +- arch/x86/kernel/alternative.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index ba8d900f3ebe..fb338f00d383 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -18,7 +18,7 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif -void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len); +extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len); /* * Currently, the max observed size in the kernel code is diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aa864157f57e..50523713989b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -325,7 +325,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len) return (target < src || target > src + src_len); } -void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +void __init_or_module apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) { int prev, target = 0; From 4604c052b84d66407f5e68045a1939685eac401e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 30 Nov 2023 17:27:35 +0100 Subject: [PATCH 19/52] x86/percpu: Declare const_pcpu_hot as extern const variable const_pcpu_hot is aliased by linker to pcpu_hot, so there is no need to use the DECLARE_PER_CPU_ALIGNED() macro. Also, declare const_pcpu_hot as extern to avoid allocating storage space for the aliased structure. Fixes: ed2f752e0e0a ("x86/percpu: Introduce const-qualified const_pcpu_hot to micro-optimize code generation") Reported-by: kernel test robot Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231130162949.83518-1-ubizjak@gmail.com Closes: https://lore.kernel.org/oe-kbuild-all/202311302257.tSFtZnly-lkp@intel.com/ --- arch/x86/include/asm/current.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0538d2436673..9fbd7cb2dc86 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -37,8 +37,7 @@ static_assert(sizeof(struct pcpu_hot) == 64); DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); /* const-qualified alias to pcpu_hot, aliased by linker. */ -DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, - const_pcpu_hot); +extern const struct pcpu_hot __percpu_seg_override const_pcpu_hot; static __always_inline struct task_struct *get_current(void) { From 9d1c8f21533729b6ead531b676fa7d327cf00819 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 23 Nov 2023 21:34:22 +0100 Subject: [PATCH 20/52] x86/smp: Move the call to smp_processor_id() after the early exit in native_stop_other_cpus() Improve code generation in native_stop_other_cpus() a tiny bit: smp_processor_id() accesses a per-CPU variable, so the compiler is not able to move the call after the early exit on its own. Also rename the "cpu" variable to a more descriptive "this_cpu", and use 'cpu' as a separate iterator variable later in the function. No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231123203605.3474745-1-ubizjak@gmail.com --- arch/x86/kernel/smp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 6eb06d001bcc..65dd44e3fc1c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -148,14 +148,15 @@ static int register_stop_handler(void) static void native_stop_other_cpus(int wait) { - unsigned int cpu = smp_processor_id(); + unsigned int this_cpu; unsigned long flags, timeout; if (reboot_force) return; /* Only proceed if this is the first CPU to reach this code */ - if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) + this_cpu = smp_processor_id(); + if (atomic_cmpxchg(&stopping_cpu, -1, this_cpu) != -1) return; /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ @@ -190,7 +191,7 @@ static void native_stop_other_cpus(int wait) * NMIs. */ cpumask_copy(&cpus_stop_mask, cpu_online_mask); - cpumask_clear_cpu(cpu, &cpus_stop_mask); + cpumask_clear_cpu(this_cpu, &cpus_stop_mask); if (!cpumask_empty(&cpus_stop_mask)) { apic_send_IPI_allbutself(REBOOT_VECTOR); @@ -234,6 +235,8 @@ static void native_stop_other_cpus(int wait) * CPUs to stop. */ if (!smp_no_nmi_ipi && !register_stop_handler()) { + unsigned int cpu; + pr_emerg("Shutting down cpus with NMI\n"); for_each_cpu(cpu, &cpus_stop_mask) From 9e9d673b2c84719937db5d6ab1d8cbcd7d45e974 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 23 Nov 2023 21:34:23 +0100 Subject: [PATCH 21/52] x86/smp: Use atomic_try_cmpxchg in native_stop_other_cpus() Use atomic_try_cmpxchg() instead of atomic_cmpxchg(*ptr, old, new) == old. X86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after the CMPXCHG. Tested by building a native Fedora-38 kernel and rebooting a 12-way SMP system using "shutdown -r" command some 100 times. No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231123203605.3474745-2-ubizjak@gmail.com --- arch/x86/kernel/smp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 65dd44e3fc1c..1bb79526c217 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -148,15 +148,16 @@ static int register_stop_handler(void) static void native_stop_other_cpus(int wait) { - unsigned int this_cpu; + unsigned int old_cpu, this_cpu; unsigned long flags, timeout; if (reboot_force) return; /* Only proceed if this is the first CPU to reach this code */ + old_cpu = -1; this_cpu = smp_processor_id(); - if (atomic_cmpxchg(&stopping_cpu, -1, this_cpu) != -1) + if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu)) return; /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ From fc50065325f8b88d6986f089ae103b5db858ab96 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 1 Dec 2023 09:57:27 +0100 Subject: [PATCH 22/52] x86/callthunks: Correct calculation of dest address in is_callthunk() GCC didn't warn on the invalid use of relocation destination pointer, so the calculated destination value was applied to the uninitialized pointer location in error. Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template") Reported-by: Nathan Chancellor Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Closes: https://lore.kernel.org/lkml/20231201035457.GA321497@dev-arch.thelio-3990X/ Link: https://lore.kernel.org/r/20231201085727.3647051-1-ubizjak@gmail.com --- arch/x86/kernel/callthunks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index f56fa303d643..2324c7f9a841 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -312,7 +312,7 @@ static bool is_callthunk(void *addr) if (!thunks_initialized || skip_addr((void *)dest)) return false; - *pad = dest - tmpl_size; + pad = (void *)(dest - tmpl_size); memcpy(insn_buff, skl_call_thunk_template, tmpl_size); apply_relocation(insn_buff, tmpl_size, pad, From 0e3703630bd3fead041a6bfb3a3f2a9891af6c34 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 4 Dec 2023 22:02:29 +0100 Subject: [PATCH 23/52] x86/percpu: Fix "const_pcpu_hot" version generation failure Version generation for "const_pcpu_hot" symbol failed because genksyms doesn't know the __seg_gs keyword. Effectively revert commit 4604c052b84d ("x86/percpu: Declare const_pcpu_hot as extern const variable") and use this_cpu_read_const() instead to avoid "sparse: dereference of noderef expression" warning when reading const_pcpu_hot. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231204210320.114429-1-ubizjak@gmail.com --- arch/x86/include/asm/current.h | 5 +++-- arch/x86/include/asm/percpu.h | 7 +++++++ arch/x86/include/asm/processor.h | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 9fbd7cb2dc86..c8c5674d69f6 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -37,12 +37,13 @@ static_assert(sizeof(struct pcpu_hot) == 64); DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); /* const-qualified alias to pcpu_hot, aliased by linker. */ -extern const struct pcpu_hot __percpu_seg_override const_pcpu_hot; +DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override, + const_pcpu_hot); static __always_inline struct task_struct *get_current(void) { if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return const_pcpu_hot.current_task; + return this_cpu_read_const(const_pcpu_hot.current_task); return this_cpu_read_stable(pcpu_hot.current_task); } diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0f12b2004b94..3859abad19ec 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -28,6 +28,7 @@ #else /* ...!ASSEMBLY */ +#include #include #include @@ -462,6 +463,7 @@ do { \ #define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val) #endif +#define this_cpu_read_const(pcp) __raw_cpu_read(, pcp) #else /* CONFIG_USE_X86_SEG_SUPPORT */ #define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp) @@ -486,6 +488,11 @@ do { \ #define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) #endif +/* + * The generic per-cpu infrastrucutre is not suitable for + * reading const-qualified variables. + */ +#define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif /* CONFIG_USE_X86_SEG_SUPPORT */ #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f20e87632669..a94a857152c4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -519,7 +519,7 @@ static __always_inline unsigned long current_top_of_stack(void) * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) - return const_pcpu_hot.top_of_stack; + return this_cpu_read_const(const_pcpu_hot.top_of_stack); return this_cpu_read_stable(pcpu_hot.top_of_stack); } From 13408c6ae684181d53c870cceddbd3a62ae34c3e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 4 Dec 2023 22:02:30 +0100 Subject: [PATCH 24/52] x86/traps: Use current_top_of_stack() helper in traps.c Use current_top_of_stack() helper in sync_regs() and vc_switch_off_ist() instead of open-coding the reading of the top_of_stack percpu variable explicitly. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231204210320.114429-2-ubizjak@gmail.com --- arch/x86/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c876f1d36a81..78b1d1a6ed2c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -772,7 +772,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) */ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; + struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1; if (regs != eregs) *regs = *eregs; return regs; @@ -790,7 +790,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * trust it and switch to the current kernel stack */ if (ip_within_syscall_gap(regs)) { - sp = this_cpu_read(pcpu_hot.top_of_stack); + sp = current_top_of_stack(); goto sync; } From 3a1d3829e193c091475ceab481c5f8deab385023 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 4 Dec 2023 22:02:31 +0100 Subject: [PATCH 25/52] x86/percpu: Avoid sparse warning with cast to named address space Teach sparse about __seg_fs and __seg_gs named address space qualifiers to to avoid warnings about unexpected keyword at the end of cast operator. Reported-by: kernel test robot Acked-by: Luc Van Oostenryck Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231204210320.114429-3-ubizjak@gmail.com Closes: https://lore.kernel.org/oe-kbuild-all/202310080853.UhMe5iWa-lkp@intel.com/ --- arch/x86/include/asm/percpu.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3859abad19ec..e56a37886143 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -36,6 +36,11 @@ #ifdef CONFIG_CC_HAS_NAMED_AS +#ifdef __CHECKER__ +#define __seg_gs __attribute__((address_space(__seg_gs))) +#define __seg_fs __attribute__((address_space(__seg_fs))) +#endif + #ifdef CONFIG_X86_64 #define __percpu_seg_override __seg_gs #else From 86ed430cf5296ca97a66f1f37e30b7dfe47cd36f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2023 08:28:41 +0100 Subject: [PATCH 26/52] x86/alternatives: Move apply_relocation() out of init section This function is now called from a few places that are no __init_or_module, resulting a link time warning: WARNING: modpost: vmlinux: section mismatch in reference: patch_dest+0x8a (section: .text) -> apply_relocation (section: .init.text) Remove the annotation here. [ mingo: Also sync up add_nop() with these changes. ] Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template") Signed-off-by: Arnd Bergmann Signed-off-by: Ingo Molnar Cc: Uros Bizjak Link: https://lore.kernel.org/r/20231204072856.1033621-1-arnd@kernel.org --- arch/x86/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 50523713989b..1781e020f393 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -44,7 +44,7 @@ EXPORT_SYMBOL_GPL(alternatives_patched); #define DA_ENDBR 0x08 #define DA_SMP 0x10 -static unsigned int __initdata_or_module debug_alternative; +static unsigned int debug_alternative; static int __init debug_alt(char *str) { @@ -132,7 +132,7 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and * *jump* over instead of executing long and daft NOPs. */ -static void __init_or_module add_nop(u8 *instr, unsigned int len) +static void add_nop(u8 *instr, unsigned int len) { u8 *target = instr + len; @@ -206,7 +206,7 @@ static int skip_nops(u8 *instr, int offset, int len) * Optimize a sequence of NOPs, possibly preceded by an unconditional jump * to the end of the NOP sequence into a single NOP. */ -static bool __init_or_module +static bool __optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target) { int i = *next - insn->length; @@ -325,7 +325,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len) return (target < src || target > src + src_len); } -void __init_or_module apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) { int prev, target = 0; From de8c6a352131f642b82474abe0cbb5dd26a7e081 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 13 Dec 2023 16:03:15 +0100 Subject: [PATCH 27/52] x86/percpu: Use %RIP-relative address in untagged_addr() %RIP-relative addresses are nowadays correctly handled in alternative instructions, so remove misleading comment and improve assembly to use %RIP-relative address. Also, explicitly using %gs: prefix will segfault for non-SMP builds. Use macros from percpu.h which will DTRT with segment prefix register as far as SMP/non-SMP builds are concerned. Signed-off-by: Uros Bizjak Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Acked-by: Kirill A. Shutemov Cc: Linus Torvalds Link: https://lore.kernel.org/all/20231213150357.5942-1-ubizjak%40gmail.com --- arch/x86/include/asm/uaccess_64.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2c02e4469cc..01455c0b070c 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_ADDRESS_MASKING /* @@ -18,14 +19,10 @@ */ static inline unsigned long __untagged_addr(unsigned long addr) { - /* - * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation - * in alternative instructions. The relocation gets wrong when gets - * copied to the target place. - */ asm (ALTERNATIVE("", - "and %%gs:tlbstate_untag_mask, %[addr]\n\t", X86_FEATURE_LAM) - : [addr] "+r" (addr) : "m" (tlbstate_untag_mask)); + "and " __percpu_arg([mask]) ", %[addr]", X86_FEATURE_LAM) + : [addr] "+r" (addr) + : [mask] "m" (__my_cpu_var(tlbstate_untag_mask))); return addr; } From be83e809ca67bca98fde97ad6b9344237963220b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:28 -0800 Subject: [PATCH 28/52] x86/bugs: Rename CONFIG_GDS_FORCE_MITIGATION => CONFIG_MITIGATION_GDS_FORCE So the CPU mitigations Kconfig entries - there's 10 meanwhile - are named in a historically idiosyncratic and hence rather inconsistent fashion and have become hard to relate with each other over the years: https://lore.kernel.org/lkml/20231011044252.42bplzjsam3qsasz@treble/ When they were introduced we never expected that we'd eventually have about a dozen of them, and that more organization would be useful, especially for Linux distributions that want to enable them in an informed fashion, and want to make sure all mitigations are configured as expected. For example, the current CONFIG_SPECULATION_MITIGATIONS namespace is only halfway populated, where some mitigations have entries in Kconfig, and they could be modified, while others mitigations do not have Kconfig entries, and can not be controlled at build time. Fine-grained control over these Kconfig entries can help in a number of ways: 1) Users can choose and pick only mitigations that are important for their workloads. 2) Users and developers can choose to disable mitigations that mangle the assembly code generation, making it hard to read. 3) Separate Kconfigs for just source code readability, so that we see *which* butt-ugly piece of crap code is for what reason... In most cases, if a mitigation is disabled at compilation time, it can still be enabled at runtime using kernel command line arguments. This is the first patch of an initial series that renames various mitigation related Kconfig options, unifying them under a single CONFIG_MITIGATION_* namespace: CONFIG_GDS_FORCE_MITIGATION => CONFIG_MITIGATION_GDS_FORCE CONFIG_CPU_IBPB_ENTRY => CONFIG_MITIGATION_IBPB_ENTRY CONFIG_CALL_DEPTH_TRACKING => CONFIG_MITIGATION_CALL_DEPTH_TRACKING CONFIG_PAGE_TABLE_ISOLATION => CONFIG_MITIGATION_PAGE_TABLE_ISOLATION CONFIG_RETPOLINE => CONFIG_MITIGATION_RETPOLINE CONFIG_SLS => CONFIG_MITIGATION_SLS CONFIG_CPU_UNRET_ENTRY => CONFIG_MITIGATION_UNRET_ENTRY CONFIG_CPU_IBRS_ENTRY => CONFIG_MITIGATION_IBRS_ENTRY CONFIG_CPU_SRSO => CONFIG_MITIGATION_SRSO CONFIG_RETHUNK => CONFIG_MITIGATION_RETHUNK Implement step 1/10 of the namespace unification of CPU mitigations related Kconfig options and rename CONFIG_GDS_FORCE_MITIGATION to CONFIG_MITIGATION_GDS_FORCE. [ mingo: Rewrote changelog for clarity. ] Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-2-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53f2e7797b1d..1dba33ad06b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2587,7 +2587,7 @@ config SLS against straight line speculation. The kernel image might be slightly larger. -config GDS_FORCE_MITIGATION +config MITIGATION_GDS_FORCE bool "Force GDS Mitigation" depends on CPU_SUP_INTEL default n diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bb0ab8466b91..3c7e27b58f0e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -671,7 +671,7 @@ enum gds_mitigations { GDS_MITIGATION_HYPERVISOR, }; -#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION) +#if IS_ENABLED(CONFIG_MITIGATION_GDS_FORCE) static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE; #else static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL; From e0b8fcfa3cfac171d589ad6085a00c584d571f08 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:29 -0800 Subject: [PATCH 29/52] x86/bugs: Rename CONFIG_CPU_IBPB_ENTRY => CONFIG_MITIGATION_IBPB_ENTRY Step 2/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-3-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1dba33ad06b3..c4e04b603759 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2554,7 +2554,7 @@ config CALL_THUNKS_DEBUG Only enable this when you are debugging call thunks as this creates a noticeable runtime overhead. If unsure say N. -config CPU_IBPB_ENTRY +config MITIGATION_IBPB_ENTRY bool "Enable IBPB on kernel entry" depends on CPU_SUP_AMD && X86_64 default y diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 262e65539f83..7341fd945698 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -289,7 +289,7 @@ * where we have a stack but before any RET instruction. */ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns -#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY) +#if defined(CONFIG_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3c7e27b58f0e..1de4791091ca 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -994,10 +994,10 @@ static void __init retbleed_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_IBPB)) { pr_err("WARNING: CPU does not support IBPB.\n"); goto do_cmd_auto; - } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + } else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } else { - pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); goto do_cmd_auto; } break; @@ -1023,7 +1023,8 @@ do_cmd_auto: boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) && + boot_cpu_has(X86_FEATURE_IBPB)) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } @@ -2482,13 +2483,13 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_IBPB: - if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); srso_mitigation = SRSO_MITIGATION_IBPB; } } else { - pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); } break; From 5fa31af31e726c7f5a8f84800153054ca499338a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:30 -0800 Subject: [PATCH 30/52] x86/bugs: Rename CONFIG_CALL_DEPTH_TRACKING => CONFIG_MITIGATION_CALL_DEPTH_TRACKING Step 3/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-4-leitao@debian.org --- arch/x86/Kconfig | 4 ++-- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/nospec-branch.h | 10 +++++----- arch/x86/kernel/asm-offsets.c | 2 +- arch/x86/kernel/cpu/bugs.c | 6 +++--- arch/x86/lib/retpoline.S | 6 +++--- scripts/Makefile.lib | 2 +- tools/arch/x86/include/asm/disabled-features.h | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c4e04b603759..b4a703d45700 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2523,7 +2523,7 @@ config CPU_UNRET_ENTRY help Compile the kernel with support for the retbleed=unret mitigation. -config CALL_DEPTH_TRACKING +config MITIGATION_CALL_DEPTH_TRACKING bool "Mitigate RSB underflow with call depth tracking" depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE @@ -2543,7 +2543,7 @@ config CALL_DEPTH_TRACKING config CALL_THUNKS_DEBUG bool "Enable call thunks and call depth tracking debugging" - depends on CALL_DEPTH_TRACKING + depends on MITIGATION_CALL_DEPTH_TRACKING select FUNCTION_ALIGNMENT_32B default n help diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index a1168e7b69e5..d4ff517cfbd1 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -17,7 +17,7 @@ struct pcpu_hot { struct task_struct *current_task; int preempt_count; int cpu_number; -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING u64 call_depth; #endif unsigned long top_of_stack; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 702d93fdd10e..c1d3a5795618 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -69,7 +69,7 @@ # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING # define DISABLE_CALL_DEPTH_TRACKING 0 #else # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7341fd945698..59810237eb42 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -73,7 +73,7 @@ # define CALL_THUNKS_DEBUG_INC_CTXSW #endif -#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) +#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) #include @@ -309,7 +309,7 @@ .macro CALL_DEPTH_ACCOUNT -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING ALTERNATIVE "", \ __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif @@ -357,7 +357,7 @@ extern void entry_ibpb(void); extern void (*x86_return_thunk)(void); -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING extern void call_depth_return_thunk(void); #define CALL_DEPTH_ACCOUNT \ @@ -371,12 +371,12 @@ DECLARE_PER_CPU(u64, __x86_ret_count); DECLARE_PER_CPU(u64, __x86_stuffs_count); DECLARE_PER_CPU(u64, __x86_ctxsw_count); #endif -#else /* !CONFIG_CALL_DEPTH_TRACKING */ +#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ static inline void call_depth_return_thunk(void) {} #define CALL_DEPTH_ACCOUNT "" -#endif /* CONFIG_CALL_DEPTH_TRACKING */ +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6913b372ccf7..a98020bf31bb 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -109,7 +109,7 @@ static void __used common(void) OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); OFFSET(X86_current_task, pcpu_hot, current_task); -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de4791091ca..b906ed4f3091 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1003,15 +1003,15 @@ static void __init retbleed_select_mitigation(void) break; case RETBLEED_CMD_STUFF: - if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) && + if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && spectre_v2_enabled == SPECTRE_V2_RETPOLINE) { retbleed_mitigation = RETBLEED_MITIGATION_STUFF; } else { - if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING)) + if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)) pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); else - pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_CALL_DEPTH_TRACKING.\n"); goto do_cmd_auto; } diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 7b2589877d06..ff46f48a0cc4 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -71,7 +71,7 @@ SYM_CODE_END(__x86_indirect_thunk_array) #include #undef GEN -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING .macro CALL_THUNK reg .align RETPOLINE_THUNK_SIZE @@ -327,7 +327,7 @@ __EXPORT_THUNK(entry_untrain_ret) #endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING .align 64 SYM_FUNC_START(call_depth_return_thunk) @@ -359,7 +359,7 @@ SYM_FUNC_START(call_depth_return_thunk) int3 SYM_FUNC_END(call_depth_return_thunk) -#endif /* CONFIG_CALL_DEPTH_TRACKING */ +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ /* * This function name is magical and is used by -mfunction-return=thunk-extern diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 1a965fe68e01..ee233ef91696 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -254,7 +254,7 @@ objtool := $(objtree)/tools/objtool/objtool objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr -objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake +objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt objtool-args-$(CONFIG_FINEIBT) += --cfi objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 702d93fdd10e..c1d3a5795618 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -69,7 +69,7 @@ # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING # define DISABLE_CALL_DEPTH_TRACKING 0 #else # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) From ea4654e0885348f0faa47f6d7b44a08d75ad16e9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:31 -0800 Subject: [PATCH 31/52] x86/bugs: Rename CONFIG_PAGE_TABLE_ISOLATION => CONFIG_MITIGATION_PAGE_TABLE_ISOLATION Step 4/10 of the namespace unification of CPU mitigations related Kconfig options. [ mingo: Converted new uses that got added since the series was posted. ] Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-5-leitao@debian.org --- Documentation/arch/x86/pti.rst | 6 +++--- arch/x86/Kconfig | 2 +- arch/x86/boot/compressed/ident_map_64.c | 4 ++-- arch/x86/entry/calling.h | 8 ++++---- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/pgalloc.h | 2 +- arch/x86/include/asm/pgtable-3level.h | 2 +- arch/x86/include/asm/pgtable.h | 18 +++++++++--------- arch/x86/include/asm/pgtable_64.h | 3 ++- arch/x86/include/asm/processor-flags.h | 2 +- arch/x86/include/asm/pti.h | 2 +- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/ldt.c | 8 ++++---- arch/x86/mm/Makefile | 2 +- arch/x86/mm/debug_pagetables.c | 4 ++-- arch/x86/mm/dump_pagetables.c | 4 ++-- arch/x86/mm/pgtable.c | 4 ++-- arch/x86/mm/tlb.c | 10 +++++----- include/linux/pti.h | 2 +- tools/arch/x86/include/asm/disabled-features.h | 2 +- 23 files changed, 51 insertions(+), 50 deletions(-) diff --git a/Documentation/arch/x86/pti.rst b/Documentation/arch/x86/pti.rst index e08d35177bc0..57e8392f61d3 100644 --- a/Documentation/arch/x86/pti.rst +++ b/Documentation/arch/x86/pti.rst @@ -26,9 +26,9 @@ comments in pti.c). This approach helps to ensure that side-channel attacks leveraging the paging structures do not function when PTI is enabled. It can be -enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. -Once enabled at compile-time, it can be disabled at boot with the -'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). +enabled by setting CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y at compile +time. Once enabled at compile-time, it can be disabled at boot with +the 'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). Page Table Management ===================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b4a703d45700..a1c047004390 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2484,7 +2484,7 @@ menuconfig SPECULATION_MITIGATIONS if SPECULATION_MITIGATIONS -config PAGE_TABLE_ISOLATION +config MITIGATION_PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" default y depends on (X86_64 || X86_PAE) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d040080d7edb..ff09ca6dbb87 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -8,8 +8,8 @@ * Copyright (C) 2016 Kees Cook */ -/* No PAGE_TABLE_ISOLATION support needed either: */ -#undef CONFIG_PAGE_TABLE_ISOLATION +/* No MITIGATION_PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include "error.h" #include "misc.h" diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 9f1d94790a54..7ac2d6f946ed 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -142,10 +142,10 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * MITIGATION_PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two * halves: */ #define PTI_USER_PGTABLE_BIT PAGE_SHIFT @@ -160,7 +160,7 @@ For 32-bit we have the following conventions - kernel is built with .macro ADJUST_KERNEL_CR3 reg:req ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID - /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + /* Clear PCID and "MITIGATION_PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg .endm @@ -275,7 +275,7 @@ For 32-bit we have the following conventions - kernel is built with .Lend_\@: .endm -#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ +#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=n: */ .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req .endm diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c40f89ab1b4c..d08cb3865c8a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -562,7 +562,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_XEN_PV ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV #endif -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI #endif @@ -578,7 +578,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) jnz .Lnative_iret ud2 -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION .Lpti_restore_regs_and_return_to_usermode: POP_REGS pop_rdi=0 @@ -1096,7 +1096,7 @@ SYM_CODE_END(error_return) * * Registers: * %r14: Used to save/restore the CR3 of the interrupted context - * when PAGE_TABLE_ISOLATION is in use. Do not clobber. + * when MITIGATION_PAGE_TABLE_ISOLATION is in use. Do not clobber. */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_ENTRY diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c1d3a5795618..fb604ec95a5f 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -44,7 +44,7 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION # define DISABLE_PTI 0 #else # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c7ec5bb88334..dcd836b59beb 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -34,7 +34,7 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Instead of one PGD, we acquire two PGDs. Being order-1, it is * both 8k in size and 8k-aligned. That lets us just flip bit 12 diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 9e7c0b719c3c..dabafba957ea 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -52,7 +52,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void native_set_pud(pud_t *pudp, pud_t pud) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); #endif pxx_xchg64(pud, pudp, native_pud_val(pud)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9d077bca6a10..df0f7d4a96f3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -909,7 +909,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); /* @@ -923,12 +923,12 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) return pgd; return __pti_set_user_pgtbl(pgdp, pgd); } -#else /* CONFIG_PAGE_TABLE_ISOLATION */ +#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ #endif /* __ASSEMBLY__ */ @@ -1131,7 +1131,7 @@ static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; - if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (p4d_flags(p4d) & ~ignore_flags) != 0; @@ -1177,7 +1177,7 @@ static inline int pgd_bad(pgd_t pgd) if (!pgtable_l5_enabled()) return 0; - if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; @@ -1422,9 +1422,9 @@ static inline bool pgdp_maps_userspace(void *__ptr) #define pgd_leaf pgd_large static inline int pgd_large(pgd_t pgd) { return 0; } -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses. @@ -1469,7 +1469,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) { return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); @@ -1484,7 +1484,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 24af25b1551a..7e9db77231ac 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -143,7 +143,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; - if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + if (pgtable_l5_enabled() || + !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) { WRITE_ONCE(*p4dp, p4d); return; } diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index d8cccadc83a6..e5f204b9b33d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -51,7 +51,7 @@ #define CR3_NOFLUSH 0 #endif -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION # define X86_CR3_PTI_PCID_USER_BIT 11 #endif diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 07375b476c4f..ab167c96b9ab 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -3,7 +3,7 @@ #define _ASM_X86_PTI_H #ifndef __ASSEMBLY__ -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION extern void pti_init(void); extern void pti_check_boottime_disable(void); extern void pti_finalize(void); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f18ca44c904b..44a91ef5a23b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -410,7 +410,7 @@ static void __die_header(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", - IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); } NOKPROBE_SYMBOL(__die_header); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 487ac57e2c81..b50f3641c4d6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -414,7 +414,7 @@ __REFDATA .align 4 SYM_DATA(initial_code, .long i386_start_kernel) -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #define PGD_ALIGN (2 * PAGE_SIZE) #define PTI_USER_PGD_FILL 1024 #else @@ -474,7 +474,7 @@ SYM_DATA_START(initial_page_table) # endif .align PAGE_SIZE /* needs to be page-sized too */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * PTI needs another page so sync_initial_pagetable() works correctly * and does not scribble over the data which is placed behind the diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d4918d03efb4..cd54313706a2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -622,7 +622,7 @@ SYM_CODE_END(vc_no_ghcb) #define SYM_DATA_START_PAGE_ALIGNED(name) \ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not * ever go out to userspace with these, so we do not diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 7a814b41402d..0f19ef355f5f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -184,7 +184,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return new_ldt; } -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION static void do_sanity_check(struct mm_struct *mm, bool had_kernel_mapping, @@ -377,7 +377,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); } -#else /* !CONFIG_PAGE_TABLE_ISOLATION */ +#else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -388,11 +388,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) { } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static void free_ldt_pgtables(struct mm_struct *mm) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION struct mmu_gather tlb; unsigned long start = LDT_BASE_ADDR; unsigned long end = LDT_END_ADDR; diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c80febc44cd2..031cd10ed17f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o -obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o +obj-$(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_X86_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index b43301cb2a80..ae5c213a1cb0 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -22,7 +22,7 @@ static int ptdump_curknl_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump_curknl); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION static int ptdump_curusr_show(struct seq_file *m, void *v) { if (current->mm->pgd) @@ -54,7 +54,7 @@ static int __init pt_dump_debug_init(void) debugfs_create_file("current_kernel", 0400, dir, NULL, &ptdump_curknl_fops); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION debugfs_create_file("current_user", 0400, dir, NULL, &ptdump_curusr_fops); #endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e1b599ecbbc2..b7b88c1d91ec 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -408,7 +408,7 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user) { pgd_t *pgd = mm->pgd; -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (user && boot_cpu_has(X86_FEATURE_PTI)) pgd = kernel_to_user_pgdp(pgd); #endif @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); void ptdump_walk_user_pgd_level_checkwx(void) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pgd_t *pgd = INIT_PGD; if (!(__supported_pte_mask & _PAGE_NX) || diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0cbc1b8e8e3d..cceb779d882d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -293,7 +293,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) for (i = 0; i < PREALLOCATED_PMDS; i++) mop_up_one_pmd(mm, &pgdp[i]); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!boot_cpu_has(X86_FEATURE_PTI)) return; @@ -325,7 +325,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION static void pgd_prepopulate_user_pmd(struct mm_struct *mm, pgd_t *k_pgd, pmd_t *pmds[]) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5768d386efab..4af930947380 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -89,10 +89,10 @@ #define CR3_HW_ASID_BITS 12 /* - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * When enabled, MITIGATION_PAGE_TABLE_ISOLATION consumes a single bit for * user/kernel switches */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION # define PTI_CONSUMED_PCID_BITS 1 #else # define PTI_CONSUMED_PCID_BITS 0 @@ -114,7 +114,7 @@ static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Make sure that the dynamic ASID space does not conflict with the * bit we are using to switch between user and kernel ASIDs. @@ -149,7 +149,7 @@ static inline u16 kern_pcid(u16 asid) static inline u16 user_pcid(u16 asid) { u16 ret = kern_pcid(asid); -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; #endif return ret; @@ -262,7 +262,7 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, static inline void invalidate_user_asid(u16 asid) { /* There is no user ASID if address space separation is off */ - if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + if (!IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) return; /* diff --git a/include/linux/pti.h b/include/linux/pti.h index 1a941efcaa62..1fbf9d6c20ef 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -2,7 +2,7 @@ #ifndef _INCLUDE_PTI_H #define _INCLUDE_PTI_H -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include #else static inline void pti_init(void) { } diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index c1d3a5795618..fb604ec95a5f 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -44,7 +44,7 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION # define DISABLE_PTI 0 #else # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) From aefb2f2e619b6c334bcb31de830aa00ba0b11129 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:32 -0800 Subject: [PATCH 32/52] x86/bugs: Rename CONFIG_RETPOLINE => CONFIG_MITIGATION_RETPOLINE Step 5/10 of the namespace unification of CPU mitigations related Kconfig options. [ mingo: Converted a few more uses in comments/messages as well. ] Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Reviewed-by: Ariel Miculas Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-6-leitao@debian.org --- Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++++---- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- arch/x86/Kconfig | 6 +++--- arch/x86/Makefile | 4 ++-- arch/x86/entry/vdso/Makefile | 4 ++-- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/linkage.h | 8 ++++---- arch/x86/include/asm/nospec-branch.h | 8 ++++---- arch/x86/kernel/alternative.c | 6 +++--- arch/x86/kernel/cpu/bugs.c | 6 +++--- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 4 ++-- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/mmu_internal.h | 2 +- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/svm/vmenter.S | 4 ++-- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/lib/Makefile | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp32.c | 2 +- arch/x86/purgatory/Makefile | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/indirect_call_wrapper.h | 2 +- include/linux/module.h | 2 +- include/net/netfilter/nf_tables_core.h | 2 +- include/net/tc_wrapper.h | 2 +- kernel/trace/ring_buffer.c | 2 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_core.c | 6 +++--- net/netfilter/nft_ct.c | 4 ++-- net/netfilter/nft_lookup.c | 2 +- net/sched/sch_api.c | 2 +- scripts/Makefile.lib | 2 +- scripts/generate_rust_target.rs | 2 +- scripts/mod/modpost.c | 2 +- tools/arch/x86/include/asm/disabled-features.h | 2 +- tools/objtool/arch/x86/special.c | 2 +- tools/objtool/check.c | 2 +- 39 files changed, 62 insertions(+), 62 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 32a8893e5617..cce768afec6b 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -473,8 +473,8 @@ Spectre variant 2 -mindirect-branch=thunk-extern -mindirect-branch-register options. If the kernel is compiled with a Clang compiler, the compiler needs to support -mretpoline-external-thunk option. The kernel config - CONFIG_RETPOLINE needs to be turned on, and the CPU needs to run with - the latest updated microcode. + CONFIG_MITIGATION_RETPOLINE needs to be turned on, and the CPU needs + to run with the latest updated microcode. On Intel Skylake-era systems the mitigation covers most, but not all, cases. See :ref:`[3] ` for more details. @@ -609,8 +609,8 @@ kernel command line. Selecting 'on' will, and 'auto' may, choose a mitigation method at run time according to the CPU, the available microcode, the setting of the - CONFIG_RETPOLINE configuration option, and the - compiler with which the kernel was built. + CONFIG_MITIGATION_RETPOLINE configuration option, + and the compiler with which the kernel was built. Selecting 'on' will also enable the mitigation against user space to user space task attacks. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e0891ac76ab3..d93f403777f2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6007,8 +6007,8 @@ Selecting 'on' will, and 'auto' may, choose a mitigation method at run time according to the CPU, the available microcode, the setting of the - CONFIG_RETPOLINE configuration option, and the - compiler with which the kernel was built. + CONFIG_MITIGATION_RETPOLINE configuration option, + and the compiler with which the kernel was built. Selecting 'on' will also enable the mitigation against user space to user space task attacks. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a1c047004390..2a3ebd679b0e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2457,7 +2457,7 @@ config CALL_PADDING config FINEIBT def_bool y - depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE + depends on X86_KERNEL_IBT && CFI_CLANG && MITIGATION_RETPOLINE select CALL_PADDING config HAVE_CALL_THUNKS @@ -2495,7 +2495,7 @@ config MITIGATION_PAGE_TABLE_ISOLATION See Documentation/arch/x86/pti.rst for more details. -config RETPOLINE +config MITIGATION_RETPOLINE bool "Avoid speculative indirect branches in kernel" select OBJTOOL if HAVE_OBJTOOL default y @@ -2507,7 +2507,7 @@ config RETPOLINE config RETHUNK bool "Enable return-thunks" - depends on RETPOLINE && CC_HAS_RETURN_THUNK + depends on MITIGATION_RETPOLINE && CC_HAS_RETURN_THUNK select OBJTOOL if HAVE_OBJTOOL default y if X86_64 help diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a56..b8d23ed059fb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -192,7 +192,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. @@ -301,7 +301,7 @@ vdso-install-$(CONFIG_IA32_EMULATION) += arch/x86/entry/vdso/vdso32.so.dbg archprepare: checkbin checkbin: -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE ifeq ($(RETPOLINE_CFLAGS),) @echo "You are building kernel with non-retpoline compiler." >&2 @echo "Please update your compiler." >&2 diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index b1b8dd1608f7..c4df99aa1615 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -87,7 +87,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ -fno-omit-frame-pointer -foptimize-sibling-calls \ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) CFL += $(RETPOLINE_VDSO_CFLAGS) endif @@ -164,7 +164,7 @@ KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) endif diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index fb604ec95a5f..24e4010c33b6 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,7 +50,7 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 571fe4d2d232..c5165204c66f 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -42,25 +42,25 @@ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk -#else /* CONFIG_RETPOLINE */ +#else /* CONFIG_MITIGATION_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif -#endif /* CONFIG_RETPOLINE */ +#endif /* CONFIG_MITIGATION_RETPOLINE */ #else /* __ASSEMBLY__ */ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" -#else /* CONFIG_RETPOLINE */ +#else /* CONFIG_MITIGATION_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif -#endif /* CONFIG_RETPOLINE */ +#endif /* CONFIG_MITIGATION_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 59810237eb42..32680cb72003 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -241,7 +241,7 @@ * instruction irrespective of kCFI. */ .macro JMP_NOSPEC reg:req -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE __CS_PREFIX \reg jmp __x86_indirect_thunk_\reg #else @@ -251,7 +251,7 @@ .endm .macro CALL_NOSPEC reg:req -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE __CS_PREFIX \reg call __x86_indirect_thunk_\reg #else @@ -378,7 +378,7 @@ static inline void call_depth_return_thunk(void) {} #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; @@ -399,7 +399,7 @@ static inline void call_depth_return_thunk(void) {} /* * Inline asm uses the %V modifier which is only in newer GCC - * which is ensured when CONFIG_RETPOLINE is defined. + * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ # define CALL_NOSPEC \ ALTERNATIVE_2( \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 55e205b21271..08c182f5a0f4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -544,7 +544,7 @@ static inline bool is_jcc32(struct insn *insn) return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; } -#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL) +#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL) /* * CALL/JMP *%\reg @@ -844,12 +844,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETHUNK */ -#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ +#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } -#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ +#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b906ed4f3091..fc46fd6447f9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1103,7 +1103,7 @@ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE static bool spectre_v2_bad_module; bool retpoline_module_ok(bool has_retpoline) @@ -1416,7 +1416,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && - !IS_ENABLED(CONFIG_RETPOLINE)) { + !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) { pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; @@ -1470,7 +1470,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) { - if (!IS_ENABLED(CONFIG_RETPOLINE)) { + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) { pr_err("Kernel not compiled with retpoline; no mitigation available!"); return SPECTRE_V2_NONE; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 12df54ff0e81..93bc52d4a472 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -307,7 +307,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) +#define RET_SIZE (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 517821b48391..36d6809c6c9e 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -324,7 +324,7 @@ static int can_optimize(unsigned long paddr) * However, the kernel built with retpolines or IBT has jump * tables disabled so the check can be skipped altogether. */ - if (!IS_ENABLED(CONFIG_RETPOLINE) && + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && !IS_ENABLED(CONFIG_X86_KERNEL_IBT) && insn_is_indirect_jump(&insn)) return 0; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a349dbfc6d5a..bb2ec03f046d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -132,7 +132,7 @@ SECTIONS LOCK_TEXT KPROBES_TEXT SOFTIRQENTRY_TEXT -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE *(.text..__x86.indirect_thunk) *(.text..__x86.return_thunk) #endif @@ -267,7 +267,7 @@ SECTIONS } #endif -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * List of instructions that call/jmp/jcc to retpoline thunks * __x86_indirect_thunk_*(). These instructions can be patched along diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0b1f991b9a31..6fdc1cf5c33d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -263,7 +263,7 @@ static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { - if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) return kvm_read_cr3(vcpu); return mmu->get_guest_pgd(vcpu); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index decc1f153669..bf73a121c5ef 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -312,7 +312,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!prefetch) vcpu->stat.pf_taken++; - if (IS_ENABLED(CONFIG_RETPOLINE) && fault.is_tdp) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp) r = kvm_tdp_page_fault(vcpu, &fault); else r = vcpu->arch.mmu->page_fault(vcpu, &fault); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7fb51424fc74..b2751b9acf03 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3455,7 +3455,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) if (!svm_check_exit_valid(exit_code)) return svm_handle_invalid_exit(vcpu, exit_code); -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE if (exit_code == SVM_EXIT_MSR) return msr_interception(vcpu); else if (exit_code == SVM_EXIT_VINTR) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index ef2ebabb059c..b9e08837ab96 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -207,7 +207,7 @@ SYM_FUNC_START(__svm_vcpu_run) 7: vmload %_ASM_AX 8: -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif @@ -344,7 +344,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) /* Pop @svm to RDI, guest registers have been saved already. */ pop %_ASM_DI -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e0f86f11c345..4e1003ba380a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6544,7 +6544,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_reason.basic >= kvm_vmx_max_exit_handlers) goto unexpected_vmexit; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE if (exit_reason.basic == EXIT_REASON_MSR_WRITE) return kvm_emulate_wrmsr(vcpu); else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index ea3a28e7b613..72cc9c90e9f3 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -49,7 +49,7 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o -lib-$(CONFIG_RETPOLINE) += retpoline.o +lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o obj-y += iomem.o diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 144a5839acfe..ad1396b1df25 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -469,7 +469,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ - if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) EMIT1(0xCC); /* int3 */ } diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index b18ce19981ec..c10083a8e68e 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1273,7 +1273,7 @@ static int emit_jmp_edx(u8 **pprog, u8 *ip) u8 *prog = *pprog; int cnt = 0; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5)); #else EMIT2(0xFF, 0xE2); diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 08aa0f25f12a..bc31863c5ee6 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -61,7 +61,7 @@ ifdef CONFIG_STACKPROTECTOR_STRONG PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong endif -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS) endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2ceba3fe4ec1..d24f29091f4b 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,7 +35,7 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index c1c76a70a6ce..fe050dab55a3 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -2,7 +2,7 @@ #ifndef _LINUX_INDIRECT_CALL_WRAPPER_H #define _LINUX_INDIRECT_CALL_WRAPPER_H -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin diff --git a/include/linux/module.h b/include/linux/module.h index 9cd0009bd050..087b369e8f17 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -885,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 780a5f6ad4a6..ff27cb2e1662 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -93,7 +93,7 @@ extern const struct nft_set_type nft_set_bitmap_type; extern const struct nft_set_type nft_set_pipapo_type; extern const struct nft_set_type nft_set_pipapo_avx2_type; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index a6d481b5bcbc..a13ba0326d5e 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -4,7 +4,7 @@ #include -#if IS_ENABLED(CONFIG_RETPOLINE) +#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9286f88fcd32..9cb69332921d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1156,7 +1156,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer) u64 ts; /* Skip retpolines :-( */ - if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local)) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local)) ts = trace_clock_local(); else ts = buffer->clock(); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d4958e7e7631..614815a3ed73 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -101,7 +101,7 @@ endif endif ifdef CONFIG_NFT_CT -ifdef CONFIG_RETPOLINE +ifdef CONFIG_MITIGATION_RETPOLINE nf_tables-objs += nft_ct_fast.o endif endif diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index c3e635364701..a48d5f0e2f3e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -21,7 +21,7 @@ #include #include -#if defined(CONFIG_RETPOLINE) && defined(CONFIG_X86) +#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_X86) static struct static_key_false nf_tables_skip_direct_calls; @@ -207,7 +207,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_pktinfo *pkt) { -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE unsigned long e; if (nf_skip_indirect_calls()) @@ -236,7 +236,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr, X(e, nft_objref_map_eval); #undef X indirect_call: -#endif /* CONFIG_RETPOLINE */ +#endif /* CONFIG_MITIGATION_RETPOLINE */ expr->ops->eval(expr, regs, pkt); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797d9..d3e66bcb2a91 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -751,7 +751,7 @@ static bool nft_ct_set_reduce(struct nft_regs_track *track, return false; } -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE static const struct nft_expr_ops nft_ct_get_fast_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), @@ -796,7 +796,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EINVAL); if (tb[NFTA_CT_DREG]) { -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE u32 k = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (k) { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 870e5b113d13..a0055f510e31 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -24,7 +24,7 @@ struct nft_lookup { struct nft_set_binding binding; }; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE bool nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9eaf637220e..d577c9e1cb42 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2353,7 +2353,7 @@ static struct pernet_operations psched_net_ops = { .exit = psched_net_exit, }; -#if IS_ENABLED(CONFIG_RETPOLINE) +#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ee233ef91696..615f2612a7ef 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -262,7 +262,7 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT) += --mnop endif objtool-args-$(CONFIG_UNWINDER_ORC) += --orc -objtool-args-$(CONFIG_RETPOLINE) += --retpoline +objtool-args-$(CONFIG_MITIGATION_RETPOLINE) += --retpoline objtool-args-$(CONFIG_RETHUNK) += --rethunk objtool-args-$(CONFIG_SLS) += --sls objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 3c6cbe2b278d..eaf524603796 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -155,7 +155,7 @@ fn main() { "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", ); let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); - if cfg.has("RETPOLINE") { + if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } ts.push("features", features); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index cb6406f485a9..72fead5f973b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1843,7 +1843,7 @@ static void add_header(struct buffer *b, struct module *mod) buf_printf(b, "\n" - "#ifdef CONFIG_RETPOLINE\n" + "#ifdef CONFIG_MITIGATION_RETPOLINE\n" "MODULE_INFO(retpoline, \"Y\");\n" "#endif\n"); diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index fb604ec95a5f..24e4010c33b6 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,7 +50,7 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 29e949579ede..4134d27c696b 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -83,7 +83,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * TODO: Once we have DWARF CFI and smarter instruction decoding logic, * ensure the same register is used in the mov and jump instructions. * - * NOTE: RETPOLINE made it harder still to decode dynamic jumps. + * NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps. */ struct reloc *arch_find_switch_table(struct objtool_file *file, struct instruction *insn) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 548ec3cd7c00..84067f018f7e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3984,7 +3984,7 @@ static int validate_retpoline(struct objtool_file *file) } else continue; } else { - WARN_INSN(insn, "indirect %s found in RETPOLINE build", + WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build", insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); } From 7b75782ffd8243288d0661750b2dcc2596d676cb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:33 -0800 Subject: [PATCH 33/52] x86/bugs: Rename CONFIG_SLS => CONFIG_MITIGATION_SLS Step 6/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-7-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/Makefile | 2 +- arch/x86/include/asm/linkage.h | 4 ++-- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/ftrace.c | 3 ++- arch/x86/net/bpf_jit_comp.c | 4 ++-- scripts/Makefile.lib | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a3ebd679b0e..ba4546556fc5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2577,7 +2577,7 @@ config CPU_SRSO help Enable the SRSO mitigation needed on AMD Zen1-4 machines. -config SLS +config MITIGATION_SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 select OBJTOOL if HAVE_OBJTOOL diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b8d23ed059fb..5ce8c30e7701 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -205,7 +205,7 @@ ifdef CONFIG_MITIGATION_RETPOLINE endif endif -ifdef CONFIG_SLS +ifdef CONFIG_MITIGATION_SLS KBUILD_CFLAGS += -mharden-sls=all endif diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index c5165204c66f..09e2d026df33 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -43,7 +43,7 @@ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_MITIGATION_RETPOLINE */ -#ifdef CONFIG_SLS +#ifdef CONFIG_MITIGATION_SLS #define RET ret; int3 #else #define RET ret @@ -55,7 +55,7 @@ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" #else /* CONFIG_MITIGATION_RETPOLINE */ -#ifdef CONFIG_SLS +#ifdef CONFIG_MITIGATION_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 08c182f5a0f4..f5442d0d2ee9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -708,8 +708,8 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * The compiler is supposed to EMIT an INT3 after every unconditional * JMP instruction due to AMD BTC. However, if the compiler is too old - * or SLS isn't enabled, we still need an INT3 after indirect JMPs - * even on Intel. + * or MITIGATION_SLS isn't enabled, we still need an INT3 after + * indirect JMPs even on Intel. */ if (op == JMP32_INSN_OPCODE && i < insn->length) bytes[i++] = INT3_INSN_OPCODE; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 93bc52d4a472..70139d9d2e01 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -307,7 +307,8 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) +#define RET_SIZE \ + (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_MITIGATION_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ad1396b1df25..63b7aa48793e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -469,7 +469,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ - if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } @@ -484,7 +484,7 @@ static void emit_return(u8 **pprog, u8 *ip) emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ - if (IS_ENABLED(CONFIG_SLS)) + if (IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 615f2612a7ef..b272ca64cd75 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -264,7 +264,7 @@ endif objtool-args-$(CONFIG_UNWINDER_ORC) += --orc objtool-args-$(CONFIG_MITIGATION_RETPOLINE) += --retpoline objtool-args-$(CONFIG_RETHUNK) += --rethunk -objtool-args-$(CONFIG_SLS) += --sls +objtool-args-$(CONFIG_MITIGATION_SLS) += --sls objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess From ac61d43983a4fe8e3ee600eee44c40868c14340a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:34 -0800 Subject: [PATCH 34/52] x86/bugs: Rename CONFIG_CPU_UNRET_ENTRY => CONFIG_MITIGATION_UNRET_ENTRY Step 7/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-8-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/bugs.c | 6 +++--- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/lib/retpoline.S | 10 +++++----- include/linux/objtool.h | 2 +- scripts/Makefile.vmlinux_o | 2 +- tools/arch/x86/include/asm/disabled-features.h | 2 +- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba4546556fc5..77b8769f3026 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2516,7 +2516,7 @@ config RETHUNK Requires a compiler with -mfunction-return=thunk-extern support for full protection. The kernel may run slower. -config CPU_UNRET_ENTRY +config MITIGATION_UNRET_ENTRY bool "Enable UNRET on kernel entry" depends on CPU_SUP_AMD && RETHUNK && X86_64 default y diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 24e4010c33b6..151f0d50e7e0 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -63,7 +63,7 @@ # define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) #endif -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY # define DISABLE_UNRET 0 #else # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 32680cb72003..97478690a579 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -212,7 +212,7 @@ */ .macro VALIDATE_UNRET_END #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -271,7 +271,7 @@ .Lskip_rsb_\@: .endm -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) #define CALL_UNTRAIN_RET "call entry_untrain_ret" #else #define CALL_UNTRAIN_RET "" @@ -334,7 +334,7 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY extern void retbleed_return_thunk(void); #else static inline void retbleed_return_thunk(void) {} diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9f42d1c59e09..6c7db2ec2c5e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -928,7 +928,7 @@ static void fix_erratum_1386(struct cpuinfo_x86 *c) void init_spectral_chicken(struct cpuinfo_x86 *c) { -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY u64 value; /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fc46fd6447f9..2580368c32d1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -982,10 +982,10 @@ static void __init retbleed_select_mitigation(void) return; case RETBLEED_CMD_UNRET: - if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_UNRET; } else { - pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_UNRET_ENTRY.\n"); goto do_cmd_auto; } break; @@ -1021,7 +1021,7 @@ do_cmd_auto: case RETBLEED_CMD_AUTO: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bb2ec03f046d..9c5cca50a36f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -504,7 +504,7 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index ff46f48a0cc4..0ad67ccadd4c 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -230,7 +230,7 @@ SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" #endif /* CONFIG_CPU_SRSO */ -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY /* * Some generic notes on the untraining sequences: @@ -312,11 +312,11 @@ SYM_CODE_END(retbleed_return_thunk) SYM_FUNC_END(retbleed_untrain_ret) #define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" -#else /* !CONFIG_CPU_UNRET_ENTRY */ +#else /* !CONFIG_MITIGATION_UNRET_ENTRY */ #define JMP_RETBLEED_UNTRAIN_RET "ud2" -#endif /* CONFIG_CPU_UNRET_ENTRY */ +#endif /* CONFIG_MITIGATION_UNRET_ENTRY */ -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) SYM_FUNC_START(entry_untrain_ret) ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ @@ -325,7 +325,7 @@ SYM_FUNC_START(entry_untrain_ret) SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ +#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_CPU_SRSO */ #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 33212e93f4a6..d030671a4c49 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -131,7 +131,7 @@ */ .macro VALIDATE_UNRET_BEGIN #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 25b3b587d37c..6277dbd730bb 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -38,7 +38,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) vmlinux-objtool-args-$(delay-objtool) += $(objtool-args-y) vmlinux-objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ - $(if $(or $(CONFIG_CPU_UNRET_ENTRY),$(CONFIG_CPU_SRSO)), --unret) + $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_CPU_SRSO)), --unret) objtool-args = $(vmlinux-objtool-args-y) --link diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 24e4010c33b6..151f0d50e7e0 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -63,7 +63,7 @@ # define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) #endif -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY # define DISABLE_UNRET 0 #else # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) From 1da8d2172ce5175118929363fe568e41f24ad3d6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:35 -0800 Subject: [PATCH 35/52] x86/bugs: Rename CONFIG_CPU_IBRS_ENTRY => CONFIG_MITIGATION_IBRS_ENTRY Step 8/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-9-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/entry/calling.h | 4 ++-- arch/x86/kernel/cpu/bugs.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 77b8769f3026..60d38df26f00 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2561,7 +2561,7 @@ config MITIGATION_IBPB_ENTRY help Compile the kernel with support for the retbleed=ibpb mitigation. -config CPU_IBRS_ENTRY +config MITIGATION_IBRS_ENTRY bool "Enable IBRS on kernel entry" depends on CPU_SUP_INTEL && X86_64 default y diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 7ac2d6f946ed..39e069b68c6e 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -303,7 +303,7 @@ For 32-bit we have the following conventions - kernel is built with * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. */ .macro IBRS_ENTER save_reg -#ifdef CONFIG_CPU_IBRS_ENTRY +#ifdef CONFIG_MITIGATION_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx @@ -332,7 +332,7 @@ For 32-bit we have the following conventions - kernel is built with * regs. Must be called after the last RET. */ .macro IBRS_EXIT save_reg -#ifdef CONFIG_CPU_IBRS_ENTRY +#ifdef CONFIG_MITIGATION_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2580368c32d1..e11bacbd8f39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1439,7 +1439,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY)) { pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; @@ -1565,7 +1565,7 @@ static void __init spectre_v2_select_mitigation(void) break; } - if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + if (IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY) && boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && retbleed_cmd != RETBLEED_CMD_STUFF && From a033eec9a06ce25388e71fa1e888792a718b9c17 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:36 -0800 Subject: [PATCH 36/52] x86/bugs: Rename CONFIG_CPU_SRSO => CONFIG_MITIGATION_SRSO Step 9/10 of the namespace unification of CPU mitigations related Kconfig options. Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-10-leitao@debian.org --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 8 ++++---- arch/x86/kernel/vmlinux.lds.S | 4 ++-- arch/x86/lib/retpoline.S | 10 +++++----- include/linux/objtool.h | 2 +- scripts/Makefile.vmlinux_o | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 60d38df26f00..a2743b7f2a18 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2570,7 +2570,7 @@ config MITIGATION_IBRS_ENTRY This mitigates both spectre_v2 and retbleed at great cost to performance. -config CPU_SRSO +config MITIGATION_SRSO bool "Mitigate speculative RAS overflow on AMD" depends on CPU_SUP_AMD && X86_64 && RETHUNK default y diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 97478690a579..94c70832994f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -212,7 +212,7 @@ */ .macro VALIDATE_UNRET_END #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -271,7 +271,7 @@ .Lskip_rsb_\@: .endm -#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) #define CALL_UNTRAIN_RET "call entry_untrain_ret" #else #define CALL_UNTRAIN_RET "" @@ -340,7 +340,7 @@ extern void retbleed_return_thunk(void); static inline void retbleed_return_thunk(void) {} #endif -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); #else diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e11bacbd8f39..f2775417bda2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2458,7 +2458,7 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_SAFE_RET: - if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { /* * Enable the return thunk for generated code * like ftrace, static_call, etc. @@ -2478,7 +2478,7 @@ static void __init srso_select_mitigation(void) else srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; } else { - pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); } break; @@ -2494,13 +2494,13 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; } } else { - pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); } break; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9c5cca50a36f..6716fccd59ce 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS *(.text..__x86.rethunk_untrain) ENTRY_TEXT -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO /* * See the comment above srso_alias_untrain_ret()'s * definition. @@ -508,7 +508,7 @@ INIT_PER_CPU(irq_stack_backing_store); . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); #endif -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); /* * GNU ld cannot do XOR until 2.41. diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 0ad67ccadd4c..67b52cbec648 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -138,7 +138,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) */ .section .text..__x86.return_thunk -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO /* * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at @@ -225,10 +225,10 @@ SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" #define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" -#else /* !CONFIG_CPU_SRSO */ +#else /* !CONFIG_MITIGATION_SRSO */ #define JMP_SRSO_UNTRAIN_RET "ud2" #define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" -#endif /* CONFIG_CPU_SRSO */ +#endif /* CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_UNRET_ENTRY @@ -316,7 +316,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #define JMP_RETBLEED_UNTRAIN_RET "ud2" #endif /* CONFIG_MITIGATION_UNRET_ENTRY */ -#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ @@ -325,7 +325,7 @@ SYM_FUNC_START(entry_untrain_ret) SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_CPU_SRSO */ +#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING diff --git a/include/linux/objtool.h b/include/linux/objtool.h index d030671a4c49..b3b8d3dab52d 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -131,7 +131,7 @@ */ .macro VALIDATE_UNRET_BEGIN #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6277dbd730bb..6de297916ce6 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -38,7 +38,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) vmlinux-objtool-args-$(delay-objtool) += $(objtool-args-y) vmlinux-objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ - $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_CPU_SRSO)), --unret) + $(if $(or $(CONFIG_MITIGATION_UNRET_ENTRY),$(CONFIG_MITIGATION_SRSO)), --unret) objtool-args = $(vmlinux-objtool-args-y) --link From 0911b8c52c4d68c57d02f172daa55a42bce703f0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 08:07:37 -0800 Subject: [PATCH 37/52] x86/bugs: Rename CONFIG_RETHUNK => CONFIG_MITIGATION_RETHUNK Step 10/10 of the namespace unification of CPU mitigations related Kconfig options. [ mingo: Added one more case. ] Suggested-by: Josh Poimboeuf Signed-off-by: Breno Leitao Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231121160740.1249350-11-leitao@debian.org --- arch/x86/Kconfig | 8 ++++---- arch/x86/Makefile | 2 +- arch/x86/configs/i386_defconfig | 2 +- arch/x86/include/asm/disabled-features.h | 2 +- arch/x86/include/asm/linkage.h | 4 ++-- arch/x86/include/asm/nospec-branch.h | 4 ++-- arch/x86/include/asm/static_call.h | 2 +- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/static_call.c | 2 +- arch/x86/lib/retpoline.S | 4 ++-- scripts/Makefile.lib | 2 +- tools/arch/x86/include/asm/disabled-features.h | 2 +- tools/objtool/check.c | 2 +- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a2743b7f2a18..0a9fea390ef3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2462,7 +2462,7 @@ config FINEIBT config HAVE_CALL_THUNKS def_bool y - depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL + depends on CC_HAS_ENTRY_PADDING && MITIGATION_RETHUNK && OBJTOOL config CALL_THUNKS def_bool n @@ -2505,7 +2505,7 @@ config MITIGATION_RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. -config RETHUNK +config MITIGATION_RETHUNK bool "Enable return-thunks" depends on MITIGATION_RETPOLINE && CC_HAS_RETURN_THUNK select OBJTOOL if HAVE_OBJTOOL @@ -2518,7 +2518,7 @@ config RETHUNK config MITIGATION_UNRET_ENTRY bool "Enable UNRET on kernel entry" - depends on CPU_SUP_AMD && RETHUNK && X86_64 + depends on CPU_SUP_AMD && MITIGATION_RETHUNK && X86_64 default y help Compile the kernel with support for the retbleed=unret mitigation. @@ -2572,7 +2572,7 @@ config MITIGATION_IBRS_ENTRY config MITIGATION_SRSO bool "Mitigate speculative RAS overflow on AMD" - depends on CPU_SUP_AMD && X86_64 && RETHUNK + depends on CPU_SUP_AMD && X86_64 && MITIGATION_RETHUNK default y help Enable the SRSO mitigation needed on AMD Zen1-4 machines. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5ce8c30e7701..ba046afb850e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -22,7 +22,7 @@ RETPOLINE_VDSO_CFLAGS := -mretpoline endif RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) -ifdef CONFIG_RETHUNK +ifdef CONFIG_MITIGATION_RETHUNK RETHUNK_CFLAGS := -mfunction-return=thunk-extern RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) endif diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 73abbbdd26f8..91801138b10b 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -42,7 +42,7 @@ CONFIG_EFI_STUB=y CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y -# CONFIG_RETHUNK is not set +# CONFIG_MITIGATION_RETHUNK is not set CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 151f0d50e7e0..36d0c1e05e60 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -57,7 +57,7 @@ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) #endif -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK # define DISABLE_RETHUNK 0 #else # define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 09e2d026df33..dc31b13b87a0 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -40,7 +40,7 @@ #ifdef __ASSEMBLY__ -#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_MITIGATION_RETPOLINE */ #ifdef CONFIG_MITIGATION_SLS @@ -52,7 +52,7 @@ #else /* __ASSEMBLY__ */ -#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" #else /* CONFIG_MITIGATION_RETPOLINE */ #ifdef CONFIG_MITIGATION_SLS diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 94c70832994f..2c0679ebe914 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -289,7 +289,7 @@ * where we have a stack but before any RET instruction. */ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns -#if defined(CONFIG_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) +#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ @@ -328,7 +328,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK extern void __x86_return_thunk(void); #else static inline void __x86_return_thunk(void) {} diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 343b722ccaf2..125c407e2abe 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -46,7 +46,7 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") #else diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f5442d0d2ee9..df91abea3420 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -769,7 +769,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * Rewrite the compiler generated return thunk tail-calls. @@ -842,7 +842,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) } #else void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_MITIGATION_RETHUNK */ #else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 77a9316da435..4eefaac64c6c 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -172,7 +172,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) } EXPORT_SYMBOL_GPL(arch_static_call_transform); -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 67b52cbec648..0045153ba222 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -127,7 +127,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) #undef GEN #endif -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * Be careful here: that label cannot really be removed because in @@ -386,4 +386,4 @@ SYM_CODE_START(__x86_return_thunk) SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_MITIGATION_RETHUNK */ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index b272ca64cd75..c3f4cacad7b0 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -263,7 +263,7 @@ objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT) += --mnop endif objtool-args-$(CONFIG_UNWINDER_ORC) += --orc objtool-args-$(CONFIG_MITIGATION_RETPOLINE) += --retpoline -objtool-args-$(CONFIG_RETHUNK) += --rethunk +objtool-args-$(CONFIG_MITIGATION_RETHUNK) += --rethunk objtool-args-$(CONFIG_MITIGATION_SLS) += --sls objtool-args-$(CONFIG_STACK_VALIDATION) += --stackval objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 151f0d50e7e0..36d0c1e05e60 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -57,7 +57,7 @@ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) #endif -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK # define DISABLE_RETHUNK 0 #else # define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 84067f018f7e..8440b7bb343c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3980,7 +3980,7 @@ static int validate_retpoline(struct objtool_file *file) if (insn->type == INSN_RETURN) { if (opts.rethunk) { - WARN_INSN(insn, "'naked' return found in RETHUNK build"); + WARN_INSN(insn, "'naked' return found in MITIGATION_RETHUNK build"); } else continue; } else { From 31a4ebee0d16a141b18730977963d0e7290b9bd2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Nov 2023 08:56:58 +0900 Subject: [PATCH 38/52] x86/vdso: Consolidate targets and clean-files 'targets' and 'clean-files' do not need to list the same files because the files listed in 'targets' are cleaned up. Refactor the code. Signed-off-by: Masahiro Yamada Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231121235701.239606-2-masahiroy@kernel.org --- arch/x86/entry/vdso/Makefile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index b1b8dd1608f7..2038d9c8e527 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -55,11 +55,8 @@ targets += vdso32/vdso32.lds $(vobjs32-y) # Build the vDSO image C files and link them in. vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) -vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) -vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) obj-y += $(vdso_img_objs) -targets += $(vdso_img_cfiles) -targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) +targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) CPPFLAGS_vdso.lds += -P -C @@ -190,5 +187,3 @@ GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) - -clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so* From 329b77b59f83440e98d792800501e5a398806860 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Nov 2023 08:56:59 +0900 Subject: [PATCH 39/52] x86/vdso: Simplify obj-y addition Add objects to obj-y in a more straightforward way. CONFIG_X86_32 and CONFIG_IA32_EMULATION are not enabled simultaneously, but even if they are, Kbuild graciously deduplicates obj-y entries. Signed-off-by: Masahiro Yamada Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231121235701.239606-3-masahiroy@kernel.org --- arch/x86/entry/vdso/Makefile | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 2038d9c8e527..cbfb5aab5e9c 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -18,11 +18,6 @@ OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -VDSO64-$(CONFIG_X86_64) := y -VDSOX32-$(CONFIG_X86_X32_ABI) := y -VDSO32-$(CONFIG_X86_32) := y -VDSO32-$(CONFIG_IA32_EMULATION) := y - # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o @@ -38,11 +33,11 @@ OBJECT_FILES_NON_STANDARD_vma.o := n OBJECT_FILES_NON_STANDARD_extable.o := n # vDSO images to build -vdso_img-$(VDSO64-y) += 64 -vdso_img-$(VDSOX32-y) += x32 -vdso_img-$(VDSO32-y) += 32 +obj-$(CONFIG_X86_64) += vdso-image-64.o +obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o +obj-$(CONFIG_X86_32) += vdso-image-32.o vdso32-setup.o +obj-$(CONFIG_IA32_EMULATION) += vdso-image-32.o vdso32-setup.o -obj-$(VDSO32-y) += vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) @@ -53,9 +48,6 @@ $(obj)/vdso.o: $(obj)/vdso.so targets += vdso.lds $(vobjs-y) targets += vdso32/vdso32.lds $(vobjs32-y) -# Build the vDSO image C files and link them in. -vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) -obj-y += $(vdso_img_objs) targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) CPPFLAGS_vdso.lds += -P -C From ac9275b3b4dd11a1c825071b9dbaf7614a399c89 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Nov 2023 08:57:00 +0900 Subject: [PATCH 40/52] x86/vdso: Use $(addprefix ) instead of $(foreach ) $(addprefix ) is slightly shorter and more intuitive. Signed-off-by: Masahiro Yamada Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231121235701.239606-4-masahiroy@kernel.org --- arch/x86/entry/vdso/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index cbfb5aab5e9c..439b52772e69 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -40,8 +40,8 @@ obj-$(CONFIG_IA32_EMULATION) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n -vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) -vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F) +vobjs := $(addprefix $(obj)/, $(vobjs-y)) +vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) $(obj)/vdso.o: $(obj)/vdso.so @@ -112,7 +112,7 @@ VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ vobjx32s-y := $(vobjs-y:.o=-x32.o) # same thing, but in the output directory -vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) +vobjx32s := $(addprefix $(obj)/, $(vobjx32s-y)) # Convert 64bit object file to x32 for x32 vDSO. quiet_cmd_x32 = X32 $@ From 289d0a475c3e5be42315376d08e0457350fb8e9c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Nov 2023 08:57:01 +0900 Subject: [PATCH 41/52] x86/vdso: Use CONFIG_COMPAT_32 to specify vdso32 In arch/x86/Kconfig, COMPAT_32 is defined as (IA32_EMULATION || X86_32). Use it to eliminate redundancy in Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231121235701.239606-5-masahiroy@kernel.org --- arch/x86/Makefile | 3 +-- arch/x86/entry/vdso/Makefile | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2264db14a25d..f2260acfd639 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -296,8 +296,7 @@ install: vdso-install-$(CONFIG_X86_64) += arch/x86/entry/vdso/vdso64.so.dbg vdso-install-$(CONFIG_X86_X32_ABI) += arch/x86/entry/vdso/vdsox32.so.dbg -vdso-install-$(CONFIG_X86_32) += arch/x86/entry/vdso/vdso32.so.dbg -vdso-install-$(CONFIG_IA32_EMULATION) += arch/x86/entry/vdso/vdso32.so.dbg +vdso-install-$(CONFIG_COMPAT_32) += arch/x86/entry/vdso/vdso32.so.dbg archprepare: checkbin checkbin: diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 439b52772e69..7a97b17f28b7 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -35,8 +35,7 @@ OBJECT_FILES_NON_STANDARD_extable.o := n # vDSO images to build obj-$(CONFIG_X86_64) += vdso-image-64.o obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o -obj-$(CONFIG_X86_32) += vdso-image-32.o vdso32-setup.o -obj-$(CONFIG_IA32_EMULATION) += vdso-image-32.o vdso32-setup.o +obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n From 4461438a8405e800f90e0e40409e5f3d07eed381 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 3 Jan 2024 19:36:26 +0100 Subject: [PATCH 42/52] x86/retpoline: Ensure default return thunk isn't used at runtime Make sure the default return thunk is not used after all return instructions have been patched by the alternatives because the default return thunk is insufficient when it comes to mitigating Retbleed or SRSO. Fix based on an earlier version by David Kaplan . [ bp: Fix the compilation error of warn_thunk_thunk being an invisible symbol, hoist thunk macro into calling.h ] Signed-off-by: Josh Poimboeuf Co-developed-by: Borislav Petkov (AMD) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231010171020.462211-4-david.kaplan@amd.com Link: https://lore.kernel.org/r/20240104132446.GEZZaxnrIgIyat0pqf@fat_crate.local --- arch/x86/entry/calling.h | 60 ++++++++++++++++++++++++++++ arch/x86/entry/entry.S | 4 ++ arch/x86/entry/thunk_32.S | 34 ++++------------ arch/x86/entry/thunk_64.S | 33 --------------- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/kernel/cpu/bugs.c | 5 +++ arch/x86/lib/retpoline.S | 15 +++---- 7 files changed, 85 insertions(+), 68 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 39e069b68c6e..bd31b2534053 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -426,3 +426,63 @@ For 32-bit we have the following conventions - kernel is built with .endm #endif /* CONFIG_SMP */ + +#ifdef CONFIG_X86_64 + +/* rdi: arg1 ... normal C conventions. rax is saved/restored. */ +.macro THUNK name, func +SYM_FUNC_START(\name) + pushq %rbp + movq %rsp, %rbp + + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + + call \func + + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rbp + RET +SYM_FUNC_END(\name) + _ASM_NOKPROBE(\name) +.endm + +#else /* CONFIG_X86_32 */ + +/* put return address in eax (arg1) */ +.macro THUNK name, func, put_ret_addr_in_eax=0 +SYM_CODE_START_NOALIGN(\name) + pushl %eax + pushl %ecx + pushl %edx + + .if \put_ret_addr_in_eax + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + .endif + + call \func + popl %edx + popl %ecx + popl %eax + RET + _ASM_NOKPROBE(\name) +SYM_CODE_END(\name) + .endm + +#endif diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 8c8d38f0cb1d..582731f74dc8 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -7,6 +7,8 @@ #include #include +#include "calling.h" + .pushsection .noinstr.text, "ax" SYM_FUNC_START(entry_ibpb) @@ -20,3 +22,5 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +THUNK warn_thunk_thunk, __warn_thunk diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 0103e103a657..da37f42f4549 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -4,33 +4,15 @@ * Copyright 2008 by Steven Rostedt, Red Hat, Inc * (inspired by Andi Kleen's thunk_64.S) */ - #include - #include - #include - /* put return address in eax (arg1) */ - .macro THUNK name, func, put_ret_addr_in_eax=0 -SYM_CODE_START_NOALIGN(\name) - pushl %eax - pushl %ecx - pushl %edx +#include +#include +#include - .if \put_ret_addr_in_eax - /* Place EIP in the arg1 */ - movl 3*4(%esp), %eax - .endif +#include "calling.h" - call \func - popl %edx - popl %ecx - popl %eax - RET - _ASM_NOKPROBE(\name) -SYM_CODE_END(\name) - .endm - - THUNK preempt_schedule_thunk, preempt_schedule - THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace - EXPORT_SYMBOL(preempt_schedule_thunk) - EXPORT_SYMBOL(preempt_schedule_notrace_thunk) +THUNK preempt_schedule_thunk, preempt_schedule +THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace +EXPORT_SYMBOL(preempt_schedule_thunk) +EXPORT_SYMBOL(preempt_schedule_notrace_thunk) diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 416b400f39db..119ebdc3d362 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -9,39 +9,6 @@ #include "calling.h" #include - /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ - .macro THUNK name, func -SYM_FUNC_START(\name) - pushq %rbp - movq %rsp, %rbp - - pushq %rdi - pushq %rsi - pushq %rdx - pushq %rcx - pushq %rax - pushq %r8 - pushq %r9 - pushq %r10 - pushq %r11 - - call \func - - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rax - popq %rcx - popq %rdx - popq %rsi - popq %rdi - popq %rbp - RET -SYM_FUNC_END(\name) - _ASM_NOKPROBE(\name) - .endm - THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace EXPORT_SYMBOL(preempt_schedule_thunk) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2c0679ebe914..55754617eaee 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -357,6 +357,8 @@ extern void entry_ibpb(void); extern void (*x86_return_thunk)(void); +extern void __warn_thunk(void); + #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING extern void call_depth_return_thunk(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f2775417bda2..a78892b0f823 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2850,3 +2850,8 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu return cpu_show_common(dev, attr, buf, X86_BUG_GDS); } #endif + +void __warn_thunk(void) +{ + WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n"); +} diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 0045153ba222..721b528da9ac 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -369,19 +369,16 @@ SYM_FUNC_END(call_depth_return_thunk) * 'JMP __x86_return_thunk' sites are changed to something else by * apply_returns(). * - * This should be converted eventually to call a warning function which - * should scream loudly when the default return thunk is called after - * alternatives have been applied. - * - * That warning function cannot BUG() because the bug splat cannot be - * displayed in all possible configurations, leading to users not really - * knowing why the machine froze. + * The ALTERNATIVE below adds a really loud warning to catch the case + * where the insufficient default return thunk ends up getting used for + * whatever reason like miscompilation or failure of + * objtool/alternatives/etc to patch all the return sites. */ SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR - ANNOTATE_UNRET_SAFE - ret + ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ + "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) From b388e57d4628eb22782bdad4cd5b83ca87a1b7c9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Feb 2024 21:57:18 -0800 Subject: [PATCH 43/52] x86/vdso: Fix rethunk patching for vdso-image-{32,64}.o For CONFIG_RETHUNK kernels, objtool annotates all the function return sites so they can be patched during boot. By design, after apply_returns() is called, all tail-calls to the compiler-generated default return thunk (__x86_return_thunk) should be patched out and replaced with whatever's needed for any mitigations (or lack thereof). The commit 4461438a8405 ("x86/retpoline: Ensure default return thunk isn't used at runtime") adds a runtime check and a WARN_ONCE() if the default return thunk ever gets executed after alternatives have been applied. This warning is a sanity check to make sure objtool and apply_returns() are doing their job. As Nathan reported, that check found something: Unpatched return thunk in use. This should not happen! WARNING: CPU: 0 PID: 1 at arch/x86/kernel/cpu/bugs.c:2856 __warn_thunk+0x27/0x40 RIP: 0010:__warn_thunk+0x27/0x40 Call Trace: ? show_regs ? __warn ? __warn_thunk ? report_bug ? console_unlock ? handle_bug ? exc_invalid_op ? asm_exc_invalid_op ? ia32_binfmt_init ? __warn_thunk warn_thunk_thunk do_one_initcall kernel_init_freeable ? __pfx_kernel_init kernel_init ret_from_fork ? __pfx_kernel_init ret_from_fork_asm Boris debugged to find that the unpatched return site was in init_vdso_image_64(), and its translation unit wasn't being analyzed by objtool, so it never got annotated. So it got ignored by apply_returns(). This is only a minor issue, as this function is only called during boot. Still, objtool needs full visibility to the kernel. Fix it by enabling objtool on vdso-image-{32,64}.o. Note this problem can only be seen with !CONFIG_X86_KERNEL_IBT, as that requires objtool to run individually on all translation units rather on vmlinux.o. [ bp: Massage commit message. ] Reported-by: Nathan Chancellor Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240215032049.GA3944823@dev-arch.thelio-3990X --- arch/x86/entry/vdso/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c4df99aa1615..b80f4bbe4f75 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -34,8 +34,12 @@ obj-y += vma.o extable.o KASAN_SANITIZE_vma.o := y UBSAN_SANITIZE_vma.o := y KCSAN_SANITIZE_vma.o := y -OBJECT_FILES_NON_STANDARD_vma.o := n -OBJECT_FILES_NON_STANDARD_extable.o := n + +OBJECT_FILES_NON_STANDARD_extable.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n +OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n +OBJECT_FILES_NON_STANDARD_vma.o := n # vDSO images to build vdso_img-$(VDSO64-y) += 64 @@ -43,7 +47,6 @@ vdso_img-$(VDSOX32-y) += x32 vdso_img-$(VDSO32-y) += 32 obj-$(VDSO32-y) += vdso32-setup.o -OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F) From b7bcffe752957c6eac7c4cd77dd6f5d943478769 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 Feb 2024 10:20:58 +0100 Subject: [PATCH 44/52] x86/vdso/kbuild: Group non-standard build attributes and primary object file rules together The fresh changes to the vDSO Makefile in: 289d0a475c3e ("x86/vdso: Use CONFIG_COMPAT_32 to specify vdso32") 329b77b59f83 ("x86/vdso: Simplify obj-y addition") Conflicted with a pending change in: b388e57d4628e ("x86/vdso: Fix rethunk patching for vdso-image-{32,64}.o") Which was resolved in a simple fasion in this merge commit: f14df823a61e ("Merge branch 'x86/vdso' into x86/core, to resolve conflict and to prepare for dependent changes") ... but all these changes make me look and notice a bit of historic baggage left in the Makefile: - Disordered build rules where non-standard build attributes relating to were placed sometimes several lines after - and sometimes *before* the .o build rules of the object files... Functional but inconsistent. - Inconsistent vertical spacing, stray whitespaces, inconsistent spelling of 'vDSO' over the years, a few spelling mistakes and inconsistent capitalization of comment blocks. Tidy it all up. No functional changes intended. Cc: Masahiro Yamada Cc: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/Makefile | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 3ccab61ddeda..620f6257bbe9 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,7 +3,7 @@ # Building vDSO images for x86. # -# Include the generic Makefile to check the built vdso. +# Include the generic Makefile to check the built vDSO: include $(srctree)/lib/vdso/Makefile # Sanitizer runtimes are unavailable and cannot be linked here. @@ -18,29 +18,29 @@ OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -# files to link into the vdso +# Files to link into the vDSO: vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o vobjs-$(CONFIG_X86_SGX) += vsgx.o -# files to link into kernel -obj-y += vma.o extable.o -KASAN_SANITIZE_vma.o := y -UBSAN_SANITIZE_vma.o := y -KCSAN_SANITIZE_vma.o := y +# Files to link into the kernel: +obj-y += vma.o extable.o +KASAN_SANITIZE_vma.o := y +UBSAN_SANITIZE_vma.o := y +KCSAN_SANITIZE_vma.o := y -OBJECT_FILES_NON_STANDARD_extable.o := n -OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n -OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n OBJECT_FILES_NON_STANDARD_vma.o := n +OBJECT_FILES_NON_STANDARD_extable.o := n -# vDSO images to build -obj-$(CONFIG_X86_64) += vdso-image-64.o -obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o -obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o +# vDSO images to build: +obj-$(CONFIG_X86_64) += vdso-image-64.o +obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o +obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o -OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n +OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n vobjs := $(addprefix $(obj)/, $(vobjs-y)) vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) From 3c6539b4c177695aaa77893c4ce91d21dea7bb3d Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 10 Feb 2024 01:18:35 -0800 Subject: [PATCH 45/52] x86/vdso: Move vDSO to mmap region The vDSO (and its initial randomization) was introduced in commit 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu"), but had very low entropy. The entropy was improved in commit 394f56fe4801 ("x86_64, vdso: Fix the vdso address randomization algorithm"), but there is still improvement to be made. In principle there should not be executable code at a low entropy offset from the stack, since the stack and executable code having separate randomization is part of what makes ASLR stronger. Remove the only executable code near the stack region and give the vDSO the same randomized base as other mmap mappings including the linker and other shared objects. This results in higher entropy being provided and there's little to no advantage in separating this from the existing executable code there. This is already how other architectures like arm64 handle the vDSO. As an side, while it's sensible for userspace to reserve the initial mmap base as a region for executable code with a random gap for other mmap allocations, along with providing randomization within that region, there isn't much the kernel can do to help due to how dynamic linkers load the shared objects. This was extracted from the PaX RANDMMAP feature. [kees: updated commit log with historical details and other tweaks] Signed-off-by: Daniel Micay Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Closes: https://github.com/KSPP/linux/issues/280 Link: https://lore.kernel.org/r/20240210091827.work.233-kees@kernel.org --- arch/x86/entry/vdso/vma.c | 57 ++---------------------------------- arch/x86/include/asm/elf.h | 1 - arch/x86/kernel/sys_x86_64.c | 7 ----- 3 files changed, 2 insertions(+), 63 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 7645730dc228..6d83ceb7f1ba 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -274,59 +274,6 @@ up_fail: return ret; } -#ifdef CONFIG_X86_64 -/* - * Put the vdso above the (randomized) stack with another randomized - * offset. This way there is no hole in the middle of address space. - * To save memory make sure it is still in the same PTE as the stack - * top. This doesn't give that many random bits. - * - * Note that this algorithm is imperfect: the distribution of the vdso - * start address within a PMD is biased toward the end. - * - * Only used for the 64-bit and x32 vdsos. - */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ - unsigned long addr, end; - unsigned offset; - - /* - * Round up the start address. It can start out unaligned as a result - * of stack start randomization. - */ - start = PAGE_ALIGN(start); - - /* Round the lowest possible end address up to a PMD boundary. */ - end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= DEFAULT_MAP_WINDOW) - end = DEFAULT_MAP_WINDOW; - end -= len; - - if (end > start) { - offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1); - addr = start + (offset << PAGE_SHIFT); - } else { - addr = start; - } - - /* - * Forcibly align the final address in case we have a hardware - * issue that requires alignment for performance reasons. - */ - addr = align_vdso_addr(addr); - - return addr; -} - -static int map_vdso_randomized(const struct vdso_image *image) -{ - unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); - - return map_vdso(image, addr); -} -#endif - int map_vdso_once(const struct vdso_image *image, unsigned long addr) { struct mm_struct *mm = current->mm; @@ -369,7 +316,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso_randomized(&vdso_image_64); + return map_vdso(&vdso_image_64, 0); } #ifdef CONFIG_COMPAT @@ -380,7 +327,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (x32) { if (!vdso64_enabled) return 0; - return map_vdso_randomized(&vdso_image_x32); + return map_vdso(&vdso_image_x32, 0); } #endif #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1e16bd5ac781..1fb83d47711f 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -392,5 +392,4 @@ struct va_alignment { } ____cacheline_aligned; extern struct va_alignment va_align; -extern unsigned long align_vdso_addr(unsigned long); #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index c783aeb37dce..cb9fa1d5c66f 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -52,13 +52,6 @@ static unsigned long get_align_bits(void) return va_align.bits & get_align_mask(); } -unsigned long align_vdso_addr(unsigned long addr) -{ - unsigned long align_mask = get_align_mask(); - addr = (addr + align_mask) & ~align_mask; - return addr | get_align_bits(); -} - static int __init control_va_addr_alignment(char *str) { /* guard against enabling this on other CPU families */ From 44c76825d6eefee9eb7ce06c38e1a6632ac7eb7d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 22:25:43 -0800 Subject: [PATCH 46/52] x86: Increase brk randomness entropy for 64-bit systems In commit c1d171a00294 ("x86: randomize brk"), arch_randomize_brk() was defined to use a 32MB range (13 bits of entropy), but was never increased when moving to 64-bit. The default arch_randomize_brk() uses 32MB for 32-bit tasks, and 1GB (18 bits of entropy) for 64-bit tasks. Update x86_64 to match the entropy used by arm64 and other 64-bit architectures. Reported-by: y0un9n132@gmail.com Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Acked-by: Jiri Kosina Closes: https://lore.kernel.org/linux-hardening/CA+2EKTVLvc8hDZc+2Yhwmus=dzOUG5E4gV7ayCbu0MPJTZzWkw@mail.gmail.com/ Link: https://lore.kernel.org/r/20240217062545.1631668-1-keescook@chromium.org --- arch/x86/kernel/process.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ab49ade31b0d..45a9d496fe2a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1030,7 +1030,10 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_page(mm->brk, 0x02000000); + if (mmap_is_ia32()) + return randomize_page(mm->brk, SZ_32M); + + return randomize_page(mm->brk, SZ_1G); } /* From 2be2a197ff6c3a659ab9285e1d88cbdc609ac6de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Feb 2024 15:23:36 +0100 Subject: [PATCH 47/52] sched/idle: Conditionally handle tick broadcast in default_idle_call() The x86 architecture has an idle routine for AMD CPUs which are affected by erratum 400. On the affected CPUs the local APIC timer stops in the C1E halt state. It therefore requires tick broadcasting. The invocation of tick_broadcast_enter()/exit() from this function violates the RCU constraints because it can end up in lockdep or tracing, which rightfully triggers a warning. tick_broadcast_enter()/exit() must be invoked before ct_cpuidle_enter() and after ct_cpuidle_exit() in default_idle_call(). Add a static branch conditional invocation of tick_broadcast_enter()/exit() into this function to allow X86 to replace the AMD specific idle code. It's guarded by a config switch which will be selected by x86. Otherwise it's a NOOP. Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240229142248.266708822@linutronix.de --- include/linux/cpu.h | 2 ++ include/linux/tick.h | 3 +++ kernel/sched/idle.c | 21 +++++++++++++++++++++ kernel/time/Kconfig | 5 +++++ 4 files changed, 31 insertions(+) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dcb89c987164..715017d4432b 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -196,6 +196,8 @@ void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); +void arch_tick_broadcast_enter(void); +void arch_tick_broadcast_exit(void); void __noreturn arch_cpu_idle_dead(void); #ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT diff --git a/include/linux/tick.h b/include/linux/tick.h index 716d17f31c45..e134f286df8a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); @@ -63,6 +64,8 @@ enum tick_broadcast_state { TICK_BROADCAST_ENTER, }; +extern struct static_key_false arch_needs_tick_broadcast; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 31231925f1ec..31ad81153295 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -81,6 +81,25 @@ void __weak arch_cpu_idle(void) cpu_idle_force_poll = 1; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE +DEFINE_STATIC_KEY_FALSE(arch_needs_tick_broadcast); + +static inline void cond_tick_broadcast_enter(void) +{ + if (static_branch_unlikely(&arch_needs_tick_broadcast)) + tick_broadcast_enter(); +} + +static inline void cond_tick_broadcast_exit(void) +{ + if (static_branch_unlikely(&arch_needs_tick_broadcast)) + tick_broadcast_exit(); +} +#else +static inline void cond_tick_broadcast_enter(void) { } +static inline void cond_tick_broadcast_exit(void) { } +#endif + /** * default_idle_call - Default CPU idle routine. * @@ -90,6 +109,7 @@ void __cpuidle default_idle_call(void) { instrumentation_begin(); if (!current_clr_polling_and_test()) { + cond_tick_broadcast_enter(); trace_cpu_idle(1, smp_processor_id()); stop_critical_timings(); @@ -99,6 +119,7 @@ void __cpuidle default_idle_call(void) start_critical_timings(); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + cond_tick_broadcast_exit(); } local_irq_enable(); instrumentation_end(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index bae8f11070be..fc3b1a06c981 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -39,6 +39,11 @@ config GENERIC_CLOCKEVENTS_BROADCAST bool depends on GENERIC_CLOCKEVENTS +# Handle broadcast in default_idle_call() +config GENERIC_CLOCKEVENTS_BROADCAST_IDLE + bool + depends on GENERIC_CLOCKEVENTS_BROADCAST + # Automatically adjust the min. reprogramming time for # clock event device config GENERIC_CLOCKEVENTS_MIN_ADJUST From cb81deefb59de01325ab822f900c13941bfaf67f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2024 23:13:00 +0100 Subject: [PATCH 48/52] x86/idle: Sanitize X86_BUG_AMD_E400 handling amd_e400_idle(), the idle routine for AMD CPUs which are affected by erratum 400 violates the RCU constraints by invoking tick_broadcast_enter() and tick_broadcast_exit() after the core code has marked RCU non-idle. The functions can end up in lockdep or tracing, which rightfully triggers a RCU warning. The core code provides now a static branch conditional invocation of the broadcast functions. Remove amd_e400_idle(), enforce default_idle() and enable the static branch on affected CPUs to cure this. [ bp: Fold in a fix for a IS_ENABLED() check fail missing a "CONFIG_" prefix which tglx spotted. ] Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/877cim6sis.ffs@tglx --- arch/x86/Kconfig | 1 + arch/x86/kernel/process.c | 42 +++++++++------------------------------ 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 502986237cb6..080265369f8e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -147,6 +147,7 @@ config X86 select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) + select GENERIC_CLOCKEVENTS_BROADCAST_IDLE if GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 45a9d496fe2a..b86ff0ffe59a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -845,31 +845,6 @@ void __noreturn stop_this_cpu(void *dummy) } } -/* - * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power - * states (local apic timer and TSC stop). - * - * XXX this function is completely buggered vs RCU and tracing. - */ -static void amd_e400_idle(void) -{ - /* - * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E - * gets set after static_cpu_has() places have been converted via - * alternatives. - */ - if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { - default_idle(); - return; - } - - tick_broadcast_enter(); - - default_idle(); - - tick_broadcast_exit(); -} - /* * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf * exists and whenever MONITOR/MWAIT extensions are present there is at @@ -890,8 +865,8 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MWAIT)) return 0; - /* Monitor has a bug. Fallback to HALT */ - if (boot_cpu_has_bug(X86_BUG_MONITOR)) + /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */ + if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) return 0; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); @@ -942,17 +917,15 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) return; - if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { - pr_info("using AMD E400 aware idle routine\n"); - static_call_update(x86_idle, amd_e400_idle); - } else if (prefer_mwait_c1_over_halt(c)) { + if (prefer_mwait_c1_over_halt(c)) { pr_info("using mwait in idle threads\n"); static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); static_call_update(x86_idle, tdx_safe_halt); - } else + } else { static_call_update(x86_idle, default_idle); + } } void amd_e400_c1e_apic_setup(void) @@ -985,7 +958,10 @@ void __init arch_post_acpi_subsys_init(void) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); - pr_info("System has AMD C1E enabled\n"); + + if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE)) + static_branch_enable(&arch_needs_tick_broadcast); + pr_info("System has AMD C1E erratum E400. Workaround enabled.\n"); } static int __init idle_setup(char *str) From 0ab562875c01c91ec8167f8f6593ea61e510fd0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Feb 2024 15:23:38 +0100 Subject: [PATCH 49/52] x86/idle: Clean up idle selection Clean up the code to make it readable. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240229142248.392017685@linutronix.de --- arch/x86/kernel/process.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b86ff0ffe59a..792394be4e60 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -910,11 +910,13 @@ static __cpuidle void mwait_idle(void) void select_idle_routine(const struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) - pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); -#endif - if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) + if (boot_option_idle_override == IDLE_POLL) { + if (IS_ENABLED(CONFIG_SMP) && smp_num_siblings > 1) + pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); + return; + } + + if (x86_idle_set()) return; if (prefer_mwait_c1_over_halt(c)) { From f3d7eab7be871d948d896e7021038b092ece687e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Feb 2024 15:23:40 +0100 Subject: [PATCH 50/52] x86/idle: Cleanup idle_setup() Updating the static call for x86_idle() from idle_setup() is counter-intuitive. Let select_idle_routine() handle it like the other idle choices, which allows to simplify the idle selection later on. While at it rewrite comments and return a proper error code and not -1. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240229142248.455616019@linutronix.de --- arch/x86/kernel/process.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 792394be4e60..970995992479 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -857,8 +857,8 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - /* User has disallowed the use of MWAIT. Fallback to HALT */ - if (boot_option_idle_override == IDLE_NOMWAIT) + /* If override is enforced on the command line, fall back to HALT. */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) return 0; /* MWAIT is not supported on this platform. Fallback to HALT */ @@ -976,24 +976,14 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; cpu_idle_poll_ctrl(true); } else if (!strcmp(str, "halt")) { - /* - * When the boot option of idle=halt is added, halt is - * forced to be used for CPU idle. In such case CPU C2/C3 - * won't be used again. - * To continue to load the CPU idle driver, don't touch - * the boot_option_idle_override. - */ - static_call_update(x86_idle, default_idle); + /* 'idle=halt' HALT for idle. C-states are disabled. */ boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { - /* - * If the boot option of "idle=nomwait" is added, - * it means that mwait will be disabled for CPU C1/C2/C3 - * states. - */ + /* 'idle=nomwait' disables MWAIT for idle */ boot_option_idle_override = IDLE_NOMWAIT; - } else - return -1; + } else { + return -EINVAL; + } return 0; } From 5f75916ec6ecdc6314b637746f3ad809f2fc7379 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Feb 2024 15:23:41 +0100 Subject: [PATCH 51/52] x86/idle: Let prefer_mwait_c1_over_halt() return bool The return value is truly boolean. Make it so. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240229142248.518723854@linutronix.de --- arch/x86/kernel/process.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 970995992479..ccaacc7f9681 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -853,21 +853,21 @@ void __noreturn stop_this_cpu(void *dummy) * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait * is passed to kernel commandline parameter. */ -static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +static bool prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; /* If override is enforced on the command line, fall back to HALT. */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) - return 0; + return false; /* MWAIT is not supported on this platform. Fallback to HALT */ if (!cpu_has(c, X86_FEATURE_MWAIT)) - return 0; + return false; /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */ if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) - return 0; + return false; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); @@ -876,13 +876,13 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) * with EAX=0, ECX=0. */ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) - return 1; + return true; /* * If MWAIT extensions are available, there should be at least one * MWAIT C1 substate present. */ - return (edx & MWAIT_C1_SUBSTATE_MASK); + return !!(edx & MWAIT_C1_SUBSTATE_MASK); } /* From 35ce64922c8263448e58a2b9e8d15a64e11e9b2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2024 23:20:32 +0100 Subject: [PATCH 52/52] x86/idle: Select idle routine only once The idle routine selection is done on every CPU bringup operation and has a guard in place which is effective after the first invocation, which is a pointless exercise. Invoke it once on the boot CPU and mark the related functions __init. The guard check has to stay as xen_set_default_idle() runs early. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/87edcu6vaq.ffs@tglx --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process.c | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1188e8bf76a2..523c466c2fc9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -558,7 +558,7 @@ static inline void load_sp0(unsigned long sp0) unsigned long __get_wchan(struct task_struct *p); -extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void select_idle_routine(void); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f367d376520..5c72af16dd06 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1938,8 +1938,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); - select_idle_routine(c); - #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif @@ -2344,6 +2342,8 @@ void __init arch_cpu_finalize_init(void) { identify_boot_cpu(); + select_idle_routine(); + /* * identify_boot_cpu() initialized SMT support information, let the * core code know. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ccaacc7f9681..f0166b31a803 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -853,8 +853,9 @@ void __noreturn stop_this_cpu(void *dummy) * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait * is passed to kernel commandline parameter. */ -static bool prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +static __init bool prefer_mwait_c1_over_halt(void) { + const struct cpuinfo_x86 *c = &boot_cpu_data; u32 eax, ebx, ecx, edx; /* If override is enforced on the command line, fall back to HALT. */ @@ -908,7 +909,7 @@ static __cpuidle void mwait_idle(void) __current_clr_polling(); } -void select_idle_routine(const struct cpuinfo_x86 *c) +void __init select_idle_routine(void) { if (boot_option_idle_override == IDLE_POLL) { if (IS_ENABLED(CONFIG_SMP) && smp_num_siblings > 1) @@ -916,10 +917,11 @@ void select_idle_routine(const struct cpuinfo_x86 *c) return; } + /* Required to guard against xen_set_default_idle() */ if (x86_idle_set()) return; - if (prefer_mwait_c1_over_halt(c)) { + if (prefer_mwait_c1_over_halt()) { pr_info("using mwait in idle threads\n"); static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {