From b29fea3676033c48a1145b075b3e88c785271f70 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 10 Aug 2018 15:24:39 -0700 Subject: [PATCH 1/6] MIPS: Export tlbmiss_handler_setup_pgd near its definition We export tlbmiss_handler_setup_pgd in arch/mips/mm/tlbex.c close to a declaration of it, rather than close to its definition as is standard. We've supported exporting symbols in assembly code since commit 22823ab419d8 ("EXPORT_SYMBOL() for asm"), so move the export to follow the function's (stub) definition. Signed-off-by: Paul Burton --- arch/mips/include/asm/asm-prototypes.h | 1 + arch/mips/mm/tlb-funcs.S | 2 ++ arch/mips/mm/tlbex.c | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/asm-prototypes.h b/arch/mips/include/asm/asm-prototypes.h index 576f1a62dea9..f901ed043c71 100644 --- a/arch/mips/include/asm/asm-prototypes.h +++ b/arch/mips/include/asm/asm-prototypes.h @@ -5,3 +5,4 @@ #include #include #include +#include diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S index a5427c6e9757..ae34361b6060 100644 --- a/arch/mips/mm/tlb-funcs.S +++ b/arch/mips/mm/tlb-funcs.S @@ -12,6 +12,7 @@ * Copyright (C) 2012 Ralf Baechle */ #include +#include #include #define FASTPATH_SIZE 128 @@ -22,6 +23,7 @@ LEAF(tlbmiss_handler_setup_pgd) .space 64 END(tlbmiss_handler_setup_pgd) EXPORT(tlbmiss_handler_setup_pgd_end) +EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd) LEAF(handle_tlbm) .space FASTPATH_SIZE * 4 diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 49312a14cd17..2223620fb573 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1576,7 +1576,6 @@ extern u32 handle_tlbs[], handle_tlbs_end[]; extern u32 handle_tlbm[], handle_tlbm_end[]; extern u32 tlbmiss_handler_setup_pgd_start[]; extern u32 tlbmiss_handler_setup_pgd[]; -EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd); extern u32 tlbmiss_handler_setup_pgd_end[]; static void build_setup_pgd(void) From 4bcb4ad6634ea1084faa630fe35d72f75b5e80fa Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 10 Aug 2018 16:03:31 -0700 Subject: [PATCH 2/6] MIPS: Consistently declare TLB functions Since at least the beginning of the git era we've declared our TLB exception handling functions inconsistently. They're actually functions, but we declare them as arrays of u32 where each u32 is an encoded instruction. This has always been the case for arch/mips/mm/tlbex.c, and has also been true for arch/mips/kernel/traps.c since commit 86a1708a9d54 ("MIPS: Make tlb exception handler definitions and declarations match.") which aimed for consistency but did so by consistently making the our C code inconsistent with our assembly. This is all usually harmless, but when using GCC 7 or newer to build a kernel targeting microMIPS (ie. CONFIG_CPU_MICROMIPS=y) it becomes problematic. With microMIPS bit 0 of the program counter indicates the ISA mode. When bit 0 is zero instructions are decoded using the standard MIPS32 or MIPS64 ISA. When bit 0 is one instructions are decoded using microMIPS. This means that function pointers become odd - their least significant bit is one for microMIPS code. We work around this in cases where we need to access code using loads & stores with our msk_isa16_mode() macro which simply clears bit 0 of the value it is given: #define msk_isa16_mode(x) ((x) & ~0x1) For example we do this for our TLB load handler in build_r4000_tlb_load_handler(): u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbl); We then write code to p, expecting it to be suitably aligned (our LEAF macro aligns functions on 4 byte boundaries, so (ulong)handle_tlbl will give a value one greater than a multiple of 4 - ie. the start of a function on a 4 byte boundary, with the ISA mode bit 0 set). This worked fine up to GCC 6, but GCC 7 & onwards is smart enough to presume that handle_tlbl which we declared as an array of u32s must be aligned sufficiently that bit 0 of its address will never be set, and as a result optimize out msk_isa16_mode(). This leads to p having an address with bit 0 set, and when we go on to attempt to store code at that address we take an address error exception due to the unaligned memory access. This leads to an exception prior to the kernel having configured its own exception handlers, so we jump to whatever handlers the bootloader configured. In the case of QEMU this results in a silent hang, since it has no useful general exception vector. Fix this by consistently declaring our TLB-related functions as functions. For handle_tlbl(), handle_tlbs() & handle_tlbm() we do this in asm/tlbex.h & we make use of the existing declaration of tlbmiss_handler_setup_pgd() in asm/mmu_context.h. Our TLB handler generation code in arch/mips/mm/tlbex.c is adjusted to deal with these definitions, in most cases simply by casting the function pointers to u32 pointers. This allows us to include asm/mmu_context.h in arch/mips/mm/tlbex.c to get the definitions of tlbmiss_handler_setup_pgd & pgd_current, removing some needless duplication. Consistently using msk_isa16_mode() on function pointers means we no longer need the tlbmiss_handler_setup_pgd_start symbol so that is removed entirely. Now that we're declaring our functions as functions GCC stops optimizing out msk_isa16_mode() & a microMIPS kernel built with either GCC 7.3.0 or 8.1.0 boots successfully. Signed-off-by: Paul Burton --- arch/mips/include/asm/mmu_context.h | 1 + arch/mips/include/asm/tlbex.h | 9 +++ arch/mips/kernel/traps.c | 4 +- arch/mips/mm/tlb-funcs.S | 1 - arch/mips/mm/tlbex.c | 100 +++++++++++----------------- 5 files changed, 51 insertions(+), 64 deletions(-) diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index b509371a6b0c..94414561de0e 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -32,6 +32,7 @@ do { \ } while (0) extern void tlbmiss_handler_setup_pgd(unsigned long); +extern char tlbmiss_handler_setup_pgd_end[]; /* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */ #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ diff --git a/arch/mips/include/asm/tlbex.h b/arch/mips/include/asm/tlbex.h index 4ed7ebed3bf1..6d97e23f30ab 100644 --- a/arch/mips/include/asm/tlbex.h +++ b/arch/mips/include/asm/tlbex.h @@ -24,4 +24,13 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l, struct uasm_reloc **r, enum tlb_write_entry wmode); +extern void handle_tlbl(void); +extern char handle_tlbl_end[]; + +extern void handle_tlbs(void); +extern char handle_tlbs_end[]; + +extern void handle_tlbm(void); +extern char handle_tlbm_end[]; + #endif /* __ASM_TLBEX_H */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 4d9ca9b465ae..5e1e580dd164 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -67,14 +67,12 @@ #include #include #include +#include #include extern void check_wait(void); extern asmlinkage void rollback_handle_int(void); extern asmlinkage void handle_int(void); -extern u32 handle_tlbl[]; -extern u32 handle_tlbs[]; -extern u32 handle_tlbm[]; extern asmlinkage void handle_adel(void); extern asmlinkage void handle_ades(void); extern asmlinkage void handle_ibe(void); diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S index ae34361b6060..00fef578c8cd 100644 --- a/arch/mips/mm/tlb-funcs.S +++ b/arch/mips/mm/tlb-funcs.S @@ -17,7 +17,6 @@ #define FASTPATH_SIZE 128 -EXPORT(tlbmiss_handler_setup_pgd_start) LEAF(tlbmiss_handler_setup_pgd) 1: j 1b /* Dummy, will be replaced. */ .space 64 diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 2223620fb573..067714291643 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -253,8 +254,10 @@ static void output_pgtable_bits_defines(void) pr_debug("\n"); } -static inline void dump_handler(const char *symbol, const u32 *handler, int count) +static inline void dump_handler(const char *symbol, const void *start, const void *end) { + unsigned int count = (end - start) / sizeof(u32); + const u32 *handler = start; int i; pr_debug("LEAF(%s)\n", symbol); @@ -402,12 +405,6 @@ static void build_restore_work_registers(u32 **p) * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, * we cannot do r3000 under these circumstances. * - * Declare pgd_current here instead of including mmu_context.h to avoid type - * conflicts for tlbmiss_handler_setup_pgd - */ -extern unsigned long pgd_current[]; - -/* * The R3000 TLB handler is simple. */ static void build_r3000_tlb_refill_handler(void) @@ -444,8 +441,7 @@ static void build_r3000_tlb_refill_handler(void) memcpy((void *)ebase, tlb_handler, 0x80); local_flush_icache_range(ebase, ebase + 0x80); - - dump_handler("r3000_tlb_refill", (u32 *)ebase, 32); + dump_handler("r3000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x80)); } #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ @@ -1465,8 +1461,7 @@ static void build_r4000_tlb_refill_handler(void) memcpy((void *)ebase, final_handler, 0x100); local_flush_icache_range(ebase, ebase + 0x100); - - dump_handler("r4000_tlb_refill", (u32 *)ebase, 64); + dump_handler("r4000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x100)); } static void setup_pw(void) @@ -1568,30 +1563,21 @@ static void build_loongson3_tlb_refill_handler(void) uasm_resolve_relocs(relocs, labels); memcpy((void *)(ebase + 0x80), tlb_handler, 0x80); local_flush_icache_range(ebase + 0x80, ebase + 0x100); - dump_handler("loongson3_tlb_refill", (u32 *)(ebase + 0x80), 32); + dump_handler("loongson3_tlb_refill", + (u32 *)(ebase + 0x80), (u32 *)(ebase + 0x100)); } -extern u32 handle_tlbl[], handle_tlbl_end[]; -extern u32 handle_tlbs[], handle_tlbs_end[]; -extern u32 handle_tlbm[], handle_tlbm_end[]; -extern u32 tlbmiss_handler_setup_pgd_start[]; -extern u32 tlbmiss_handler_setup_pgd[]; -extern u32 tlbmiss_handler_setup_pgd_end[]; - static void build_setup_pgd(void) { const int a0 = 4; const int __maybe_unused a1 = 5; const int __maybe_unused a2 = 6; - u32 *p = tlbmiss_handler_setup_pgd_start; - const int tlbmiss_handler_setup_pgd_size = - tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd_start; + u32 *p = (u32 *)msk_isa16_mode((ulong)tlbmiss_handler_setup_pgd); #ifndef CONFIG_MIPS_PGD_C0_CONTEXT long pgdc = (long)pgd_current; #endif - memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size * - sizeof(tlbmiss_handler_setup_pgd[0])); + memset(p, 0, tlbmiss_handler_setup_pgd_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); pgd_reg = allocate_kscratch(); @@ -1644,15 +1630,15 @@ static void build_setup_pgd(void) else uasm_i_nop(&p); #endif - if (p >= tlbmiss_handler_setup_pgd_end) + if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", - (unsigned int)(p - tlbmiss_handler_setup_pgd)); + (unsigned int)(p - (u32 *)tlbmiss_handler_setup_pgd)); dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, - tlbmiss_handler_setup_pgd_size); + tlbmiss_handler_setup_pgd_end); } static void @@ -1921,12 +1907,11 @@ build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, static void build_r3000_tlb_load_handler(void) { - u32 *p = handle_tlbl; - const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; + u32 *p = (u32 *)handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); + memset(p, 0, handle_tlbl_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1940,24 +1925,23 @@ static void build_r3000_tlb_load_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbl_end) + if (p >= (u32 *)handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + (unsigned int)(p - (u32 *)handle_tlbl)); - dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size); + dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_end); } static void build_r3000_tlb_store_handler(void) { - u32 *p = handle_tlbs; - const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; + u32 *p = (u32 *)handle_tlbs; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); + memset(p, 0, handle_tlbs_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -1971,24 +1955,23 @@ static void build_r3000_tlb_store_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbs_end) + if (p >= (u32 *)handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); + (unsigned int)(p - (u32 *)handle_tlbs)); - dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size); + dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_end); } static void build_r3000_tlb_modify_handler(void) { - u32 *p = handle_tlbm; - const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; + u32 *p = (u32 *)handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; - memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); + memset(p, 0, handle_tlbm_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2002,14 +1985,14 @@ static void build_r3000_tlb_modify_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbm_end) + if (p >= (u32 *)handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + (unsigned int)(p - (u32 *)handle_tlbm)); - dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size); + dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_end); } #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ @@ -2101,12 +2084,11 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, static void build_r4000_tlb_load_handler(void) { u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbl); - const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); + memset(p, 0, handle_tlbl_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2287,25 +2269,24 @@ static void build_r4000_tlb_load_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbl_end) + if (p >= (u32 *)handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + (unsigned int)(p - (u32 *)handle_tlbl)); - dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size); + dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_end); } static void build_r4000_tlb_store_handler(void) { u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbs); - const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); + memset(p, 0, handle_tlbs_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2342,25 +2323,24 @@ static void build_r4000_tlb_store_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbs_end) + if (p >= (u32 *)handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); + (unsigned int)(p - (u32 *)handle_tlbs)); - dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size); + dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_end); } static void build_r4000_tlb_modify_handler(void) { u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbm); - const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; struct work_registers wr; - memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); + memset(p, 0, handle_tlbm_end - (char *)p); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -2398,14 +2378,14 @@ static void build_r4000_tlb_modify_handler(void) uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); uasm_i_nop(&p); - if (p >= handle_tlbm_end) + if (p >= (u32 *)handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); uasm_resolve_relocs(relocs, labels); pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + (unsigned int)(p - (u32 *)handle_tlbm)); - dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size); + dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_end); } static void flush_tlb_handlers(void) From cfd54de3b0e4f82b3f2b80230726f74a6bec964f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 17 Aug 2018 15:36:24 -0700 Subject: [PATCH 3/6] MIPS: Avoid move psuedo-instruction whilst using MIPS_ISA_LEVEL MIPS_ISA_LEVEL is always defined as the 64 bit ISA that is a compatible superset of the ISA that the kernel build is targeting, and is used to allow us to emit instructions that we may detect support for at runtime. When we use a .set MIPS_ISA_LEVEL directive & are building a 32-bit kernel, we therefore are temporarily allowing the assembler to generate MIPS64 instructions. Using the move pseudo-instruction whilst this is the case is problematic because the assembler is likely to emit a daddu instruction which will generate a reserved instruction exception when executed on a MIPS32 machine. Unfortunately the combination of commit a0a5ac3ce8fe ("MIPS: Fix delay slot bug in `atomic*_sub_if_positive' for R10000_LLSC_WAR") and commit 4936084c2ee2 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") causes us to do exactly this in atomic_sub_if_positive(), and the result is MIPS64 daddu instructions in 32-bit kernels. Fix this by using .set mips0 to restore the default ISA after the ll instruction, and use .set MIPS_ISA_LEVEL again prior to the sc. This ensures everything but the ll & sc are assembled using the default ISA for the kernel build & the move pseudo-instruction is emitted as a MIPS32 addu instruction. We appear to have another pre-existing instance of the same issue in our atomic_fetch_*_relaxed() functions, and fix that up too by moving our .set move0 such that it occurs prior to use of the move pseudo-instruction. Signed-off-by: Paul Burton Fixes: a0a5ac3ce8fe ("MIPS: Fix delay slot bug in `atomic*_sub_if_positive' for R10000_LLSC_WAR") Fixes: 4936084c2ee2 ("MIPS: Cleanup R10000_LLSC_WAR logic in atomic.h") Patchwork: https://patchwork.linux-mips.org/patch/20253/ Cc: James Hogan Cc: Joshua Kinard Cc: Ralf Baechle Cc: linux-mips@linux-mips.org --- arch/mips/include/asm/atomic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 3ccea238be2d..f8793f1f2670 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -122,8 +122,8 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ " " #asm_op " %0, %1, %3 \n" \ " sc %0, %2 \n" \ "\t" __scbeqz " %0, 1b \n" \ - " move %0, %1 \n" \ " .set mips0 \n" \ + " move %0, %1 \n" \ : "=&r" (result), "=&r" (temp), \ "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ @@ -190,9 +190,11 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) __asm__ __volatile__( " .set "MIPS_ISA_LEVEL" \n" "1: ll %1, %2 # atomic_sub_if_positive\n" + " .set mips0 \n" " subu %0, %1, %3 \n" " move %1, %0 \n" " bltz %0, 1f \n" + " .set "MIPS_ISA_LEVEL" \n" " sc %1, %2 \n" "\t" __scbeqz " %1, 1b \n" "1: \n" From 04f264d3a8b0eb25d378127bd78c3c9a0261c828 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:17 -0700 Subject: [PATCH 4/6] compiler.h: Allow arch-specific asm/compiler.h We have a need to override the definition of barrier_before_unreachable() for MIPS, which means we either need to add architecture-specific code into linux/compiler-gcc.h or we need to allow the architecture to provide a header that can define the macro before the generic definition. The latter seems like the better approach. A straightforward approach to the per-arch header is to make use of asm-generic to provide a default empty header & adjust architectures which don't need anything specific to make use of that by adding the header to generic-y. Unfortunately this doesn't work so well due to commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes") which caused linux/compiler_types.h to be included in the compilation of every C file via the -include linux/kconfig.h flag in c_flags. Because the -include flag is present for all C files we compile, we need the architecture-provided header to be present before any C files are compiled. If any C files can be compiled prior to the asm-generic header wrappers being generated then we hit a build failure due to missing header. Such cases do exist - one pointed out by the kbuild test robot is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part of the archprepare target [1]. This leaves us with a few options: 1) Use generic-y & fix any build failures we find by enforcing ordering such that the asm-generic target occurs before any C compilation, such that linux/compiler_types.h can always include the generated asm-generic wrapper which in turn includes the empty asm-generic header. This would rely on us finding all the problematic cases - I don't know for sure that the ia64 issue is the only one. 2) Add an actual empty header to each architecture, so that we don't need the generated asm-generic wrapper. This seems messy. 3) Give up & add #ifdef CONFIG_MIPS or similar to linux/compiler_types.h. This seems messy too. 4) Include the arch header only when it's actually needed, removing the need for the asm-generic wrapper for all other architectures. This patch allows us to use approach 4, by including an asm/compiler.h header from linux/compiler_types.h after the inclusion of the compiler-specific linux/compiler-*.h header(s). We do this conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in order to avoid the need for asm-generic wrappers & the associated build ordering issue described above. The asm/compiler.h header is included after the generic linux/compiler-*.h header(s) for consistency with the way linux/compiler-intel.h & linux/compiler-clang.h are included after the linux/compiler-gcc.h header that they override. [1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html Signed-off-by: Paul Burton Reviewed-by: Masahiro Yamada Patchwork: https://patchwork.linux-mips.org/patch/20269/ Cc: Arnd Bergmann Cc: James Hogan Cc: Masahiro Yamada Cc: Ralf Baechle Cc: linux-arch@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: linux-mips@linux-mips.org --- arch/Kconfig | 8 ++++++++ include/linux/compiler_types.h | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 1aa59063f1fd..5c7c48e7b727 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -971,4 +971,12 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_COMPILER_H + bool + help + An architecture can select this if it provides an + asm/compiler.h header that should be included after + linux/compiler-*.h in order to override macro definitions that those + headers generally provide. + source "kernel/gcov/Kconfig" diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a7..4be464a07612 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif +/* + * Some architectures need to provide custom definitions of macros provided + * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that + * conditionally rather than using an asm-generic wrapper in order to avoid + * build failures if any C compilation, which will include this file via an + * -include argument in c_flags, occurs prior to the asm-generic wrappers being + * generated. + */ +#ifdef CONFIG_HAVE_ARCH_COMPILER_H +#include +#endif + /* * Generic compiler-dependent macros required for kernel * build go below this comment. Actual compiler/compiler version From 906d441febc0de974b2a6ef848a8f058f3bfada3 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:18 -0700 Subject: [PATCH 5/6] MIPS: Workaround GCC __builtin_unreachable reordering bug Some versions of GCC for the MIPS architecture suffer from a bug which can lead to instructions from beyond an unreachable statement being incorrectly reordered into earlier branch delay slots if the unreachable statement is the only content of a case in a switch statement. This can lead to seemingly random behaviour, such as invalid memory accesses from incorrectly reordered loads or stores, and link failures on microMIPS builds. See this potential GCC fix for details: https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html Runtime problems resulting from this bug were initially observed using a maltasmvp_defconfig v4.4 kernel built using GCC 4.9.2 (from a Codescape SDK 2015.06-05 toolchain), with the result being an address exception taken after log messages about the L1 caches (during probe of the L2 cache): Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff] VPE topology {2,2} total 4 Primary instruction cache 64kB, VIPT, 4-way, linesize 32 bytes. Primary data cache 64kB, 4-way, PIPT, no aliases, linesize 32 bytes This is early enough that the kernel exception vectors are not in use, so any further output depends upon the bootloader. This is reproducible in QEMU where no further output occurs - ie. the system hangs here. Given the nature of the bug it may potentially be hit with differing symptoms. The bug is known to affect GCC versions as recent as 7.3, and it is unclear whether GCC 8 fixed it or just happens not to encounter the bug in the testcase found at the link above due to differing optimizations. This bug can be worked around by placing a volatile asm statement, which GCC is prevented from reordering past, prior to the __builtin_unreachable call. That was actually done already for other reasons by commit 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()"), but creates problems for microMIPS builds due to the lack of a .insn directive. The microMIPS ISA allows for interlinking with regular MIPS32 code by repurposing bit 0 of the program counter as an ISA mode bit. To switch modes one changes the value of this bit in the PC. However typical branch instructions encode their offsets as multiples of 2-byte instruction halfwords, which means they cannot change ISA mode - this must be done using either an indirect branch (a jump-register in MIPS terminology) or a dedicated jalx instruction. In order to ensure that regular branches don't attempt to target code in a different ISA which they can't actually switch to, the linker will check that branch targets are code in the same ISA as the branch. Unfortunately our empty asm volatile statements don't qualify as code, and the link for microMIPS builds fails with errors such as: arch/mips/mm/dma-default.s:3265: Error: branch to a symbol in another ISA mode arch/mips/mm/dma-default.s:5027: Error: branch to a symbol in another ISA mode Resolve this by adding a .insn directive within the asm statement which declares that what comes next is code. This may or may not be true, since we don't really know what comes next, but as this code is in an unreachable path anyway that doesn't matter since we won't execute it. We do this in asm/compiler.h & select CONFIG_HAVE_ARCH_COMPILER_H in order to have this included by linux/compiler_types.h after linux/compiler-gcc.h. This will result in asm/compiler.h being included in all C compilations via the -include linux/compiler_types.h argument in c_flags, which should be harmless. Signed-off-by: Paul Burton Fixes: 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()") Patchwork: https://patchwork.linux-mips.org/patch/20270/ Cc: James Hogan Cc: Ralf Baechle Cc: Arnd Bergmann Cc: linux-mips@linux-mips.org --- arch/mips/Kconfig | 1 + arch/mips/include/asm/compiler.h | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 642a56e2a1ea..f127dbd4b85a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -33,6 +33,7 @@ config MIPS select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ + select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index e081a265f422..cc2eb1b06050 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -8,6 +8,41 @@ #ifndef _ASM_COMPILER_H #define _ASM_COMPILER_H +/* + * With GCC 4.5 onwards we can use __builtin_unreachable to indicate to the + * compiler that a particular code path will never be hit. This allows it to be + * optimised out of the generated binary. + * + * Unfortunately at least GCC 4.6.3 through 7.3.0 inclusive suffer from a bug + * that can lead to instructions from beyond an unreachable statement being + * incorrectly reordered into earlier delay slots if the unreachable statement + * is the only content of a case in a switch statement. This can lead to + * seemingly random behaviour, such as invalid memory accesses from incorrectly + * reordered loads or stores. See this potential GCC fix for details: + * + * https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html + * + * It is unclear whether GCC 8 onwards suffer from the same issue - nothing + * relevant is mentioned in GCC 8 release notes and nothing obviously relevant + * stands out in GCC commit logs, but these newer GCC versions generate very + * different code for the testcase which doesn't exhibit the bug. + * + * GCC also handles stack allocation suboptimally when calling noreturn + * functions or calling __builtin_unreachable(): + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * We work around both of these issues by placing a volatile asm statement, + * which GCC is prevented from reordering past, prior to __builtin_unreachable + * calls. + * + * The .insn statement is required to ensure that any branches to the + * statement, which sadly must be kept due to the asm statement, are known to + * be branches to code and satisfy linker requirements for microMIPS kernels. + */ +#undef barrier_before_unreachable +#define barrier_before_unreachable() asm volatile(".insn") + #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) #define GCC_IMM_ASM() "n" #define GCC_REG_ACCUM "$0" From 690d9163bf4b8563a2682e619f938e6a0443947f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 21 Aug 2018 12:12:59 -0700 Subject: [PATCH 6/6] MIPS: lib: Provide MIPS64r6 __multi3() for GCC < 7 Some versions of GCC suboptimally generate calls to the __multi3() intrinsic for MIPS64r6 builds, resulting in link failures due to the missing function: LD vmlinux.o MODPOST vmlinux.o kernel/bpf/verifier.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' fs/select.o: In function `kmalloc_array': include/linux/slab.h:631: undefined reference to `__multi3' ... We already have a workaround for this in which we provide the instrinsic, but we do so selectively for GCC 7 only. Unfortunately the issue occurs with older GCC versions too - it has been observed with both GCC 5.4.0 & GCC 6.4.0. MIPSr6 support was introduced in GCC 5, so all major GCC versions prior to GCC 8 are affected and we extend our workaround accordingly to all MIPS64r6 builds using GCC versions older than GCC 8. Signed-off-by: Paul Burton Reported-by: Vladimir Kondratiev Fixes: ebabcf17bcd7 ("MIPS: Implement __multi3 for GCC7 MIPS64r6 builds") Patchwork: https://patchwork.linux-mips.org/patch/20297/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.15+ --- arch/mips/lib/multi3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c index 111ad475aa0c..4c2483f410c2 100644 --- a/arch/mips/lib/multi3.c +++ b/arch/mips/lib/multi3.c @@ -4,12 +4,12 @@ #include "libgcc.h" /* - * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that - * specific case only we'll implement it here. + * GCC 7 & older can suboptimally generate __multi3 calls for mips64r6, so for + * that specific case only we implement that intrinsic here. * * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 */ -#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ < 8) /* multiply 64-bit values, low 64-bits returned */ static inline long long notrace dmulu(long long a, long long b)