arm64 fixes for -rc2

- Revert temporary bodge in MTE coredumping to ease maple tree integration
 
 - Fix stack frame size warning reported with 64k pages
 
 - Fix stop_machine() race with instruction text patching
 
 - Ensure alternatives patching routines are not instrumented
 
 - Enable Spectre-BHB mitigation for Cortex-A78AE
 
 - Fix hugetlb TLB invalidation when contiguous hint is used
 
 - Minor perf driver fixes
 
 - Fix some typos
 -----BEGIN PGP SIGNATURE-----
 
 iQFEBAABCgAuFiEEPxTL6PPUbjXGY88ct6xw3ITBYzQFAmJQPzQQHHdpbGxAa2Vy
 bmVsLm9yZwAKCRC3rHDchMFjNGWCCACHllJr2EwIkoq9WtuNdKDIsuElVcwkxJBD
 0cnZKgefwsO8BYgmG2AKJxdxqiAAF6ENYl1hVmsySwEmzqQtsuu9B0tQ1IW/1tYf
 XW3WwmmaUc4Os6v7nO0GspngU6eow1R1sypi99k13PJk4V6LLHQUnXbVHB5/39mW
 4upAe7A/Myb4hYlLKN51eFmFe1hmBBqWXJndvLNzODlkHaxHw11fi49rAbV4ibAM
 y+MjhfoqFh9wx8CUw3RNZpR3uWl3XWDfA5UBvSOMcA330UFdtmBTuNxfvMssks33
 wqXbxLzWss7wvwZuvcyaLaFP6LpSJDrAAyKUfsDfK1hd7zOVPaOe
 =GWtz
 -----END PGP SIGNATURE-----

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "The two main things to note are:

   (1) The bulk of the diffstat is us reverting a horrible bodge we had
       in place to ease the merging of maple tree during the merge
       window (which turned out not to be needed, but anyway)

   (2) The TLB invalidation fix is done in core code, as suggested by
       (and Acked-by) Peter.

  Summary:

   - Revert temporary bodge in MTE coredumping to ease maple tree integration

   - Fix stack frame size warning reported with 64k pages

   - Fix stop_machine() race with instruction text patching

   - Ensure alternatives patching routines are not instrumented

   - Enable Spectre-BHB mitigation for Cortex-A78AE

   - Fix hugetlb TLB invalidation when contiguous hint is used

   - Minor perf driver fixes

   - Fix some typos"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  perf/imx_ddr: Fix undefined behavior due to shift overflowing the constant
  arm64: Add part number for Arm Cortex-A78AE
  arm64: patch_text: Fixup last cpu should be master
  tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry
  arm64: alternatives: mark patch_alternative() as `noinstr`
  perf: MARVELL_CN10K_DDR_PMU should depend on ARCH_THUNDER
  perf: qcom_l2_pmu: fix an incorrect NULL check on list iterator
  arm64: Fix comments in macro __init_el2_gicv3
  arm64: fix typos in comments
  arch/arm64: Fix topology initialization for core scheduling
  arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()
  Revert "arm64: Change elfcore for_each_mte_vma() to use VMA iterator"
This commit is contained in:
Linus Torvalds 2022-04-08 07:09:17 -10:00
commit c0aa53389b
15 changed files with 53 additions and 41 deletions

View File

@ -75,6 +75,7 @@
#define ARM_CPU_PART_CORTEX_A77 0xD0D
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
@ -130,6 +131,7 @@
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)

View File

@ -107,7 +107,7 @@
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults
.Lskip_gicv3_\@:
.endm

View File

@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature)
/*
* Check if the target PC is within an alternative block.
*/
static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
{
unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
return !(pc >= replptr && pc <= (replptr + alt->alt_len));
@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
static void patch_alternative(struct alt_instr *alt,
static noinstr void patch_alternative(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
__le32 *replptr;

View File

@ -8,16 +8,9 @@
#include <asm/cpufeature.h>
#include <asm/mte.h>
#ifndef VMA_ITERATOR
#define VMA_ITERATOR(name, mm, addr) \
struct mm_struct *name = mm
#define for_each_vma(vmi, vma) \
for (vma = vmi->mmap; vma; vma = vma->vm_next)
#endif
#define for_each_mte_vma(vmi, vma) \
#define for_each_mte_vma(tsk, vma) \
if (system_supports_mte()) \
for_each_vma(vmi, vma) \
for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \
if (vma->vm_flags & VM_MTE)
static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
@ -32,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
static int mte_dump_tag_range(struct coredump_params *cprm,
unsigned long start, unsigned long end)
{
int ret = 1;
unsigned long addr;
void *tags = NULL;
for (addr = start; addr < end; addr += PAGE_SIZE) {
char tags[MTE_PAGE_TAG_STORAGE];
struct page *page = get_dump_page(addr);
/*
@ -59,22 +53,36 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
continue;
}
if (!tags) {
tags = mte_allocate_tag_storage();
if (!tags) {
put_page(page);
ret = 0;
break;
}
}
mte_save_page_tags(page_address(page), tags);
put_page(page);
if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE))
return 0;
if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) {
mte_free_tag_storage(tags);
ret = 0;
break;
}
}
return 1;
if (tags)
mte_free_tag_storage(tags);
return ret;
}
Elf_Half elf_core_extra_phdrs(void)
{
struct vm_area_struct *vma;
int vma_count = 0;
VMA_ITERATOR(vmi, current->mm, 0);
for_each_mte_vma(vmi, vma)
for_each_mte_vma(current, vma)
vma_count++;
return vma_count;
@ -83,9 +91,8 @@ Elf_Half elf_core_extra_phdrs(void)
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, current->mm, 0);
for_each_mte_vma(vmi, vma) {
for_each_mte_vma(current, vma) {
struct elf_phdr phdr;
phdr.p_type = PT_ARM_MEMTAG_MTE;
@ -109,9 +116,8 @@ size_t elf_core_extra_data_size(void)
{
struct vm_area_struct *vma;
size_t data_size = 0;
VMA_ITERATOR(vmi, current->mm, 0);
for_each_mte_vma(vmi, vma)
for_each_mte_vma(current, vma)
data_size += mte_vma_tag_dump_size(vma);
return data_size;
@ -120,9 +126,8 @@ size_t elf_core_extra_data_size(void)
int elf_core_write_extra_data(struct coredump_params *cprm)
{
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, current->mm, 0);
for_each_mte_vma(vmi, vma) {
for_each_mte_vma(current, vma) {
if (vma->vm_flags & VM_DONTDUMP)
continue;

View File

@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler);
* addresses. There is no straight-forward way, short of disassembling the
* offending instruction, to map that address back to the watchpoint. This
* function computes the distance of the memory access from the watchpoint as a
* heuristic for the likelyhood that a given access triggered the watchpoint.
* heuristic for the likelihood that a given access triggered the watchpoint.
*
* See Section D2.10.5 "Determining the memory location that caused a Watchpoint
* exception" of ARMv8 Architecture Reference Manual for details.

View File

@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
* increasing the section's alignment so that the
* resulting address of this instruction is guaranteed
* to equal the offset in that particular bit (as well
* as all less signficant bits). This ensures that the
* as all less significant bits). This ensures that the
* address modulo 4 KB != 0xfff8 or 0xfffc (which would
* have all ones in bits [11:3])
*/

View File

@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
int i, ret = 0;
struct aarch64_insn_patch *pp = arg;
/* The first CPU becomes master */
if (atomic_inc_return(&pp->cpu_count) == 1) {
/* The last CPU becomes master */
if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
pp->new_insns[i]);

View File

@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope)
if (scope == SCOPE_LOCAL_CPU) {
static const struct midr_range spectre_bhb_k32_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),

View File

@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void)
* Log the CPU info before it is marked online and might get read.
*/
cpuinfo_store_cpu();
store_cpu_topology(cpu);
/*
* Enable GIC and timers.
@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void)
ipi_setup(cpu);
store_cpu_topology(cpu);
numa_add_cpu(cpu);
/*

View File

@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
/*
* Restore pstate flags. OS lock and mdscr have been already
* restored, so from this point onwards, debugging is fully
* renabled if it was enabled when core started shutdown.
* reenabled if it was enabled when core started shutdown.
*/
local_daif_restore(flags);

View File

@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr);
* In this scheme a comparatively quicker boot is observed.
*
* If ZONE_DMA configs are defined, crash kernel memory reservation
* is delayed until DMA zone memory range size initilazation performed in
* is delayed until DMA zone memory range size initialization performed in
* zone_sizes_init(). The defer is necessary to steer clear of DMA zone
* memory range to avoid overlap allocation. So crash kernel memory boundaries
* are not known when mapping all bank memory ranges, which otherwise means
@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr);
* so page-granularity mappings are created for the entire memory range.
* Hence a slightly slower boot is observed.
*
* Note: Page-granularity mapppings are necessary for crash kernel memory
* Note: Page-granularity mappings are necessary for crash kernel memory
* range for shrinking its size via /sys/kernel/kexec_crash_size interface.
*/
#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)

View File

@ -187,7 +187,7 @@ source "drivers/perf/hisilicon/Kconfig"
config MARVELL_CN10K_DDR_PMU
tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
depends on ARM64 || (COMPILE_TEST && 64BIT)
depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
help
Enable perf support for Marvell DDR Performance monitoring
event on CN10K platform.

View File

@ -29,7 +29,7 @@
#define CNTL_OVER_MASK 0xFFFFFFFE
#define CNTL_CSV_SHIFT 24
#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
#define EVENT_CYCLES_ID 0
#define EVENT_CYCLES_COUNTER 0

View File

@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
{
u64 mpidr;
int cpu_cluster_id;
struct cluster_pmu *cluster = NULL;
struct cluster_pmu *cluster;
/*
* This assumes that the cluster_id is in MPIDR[aff1] for
@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
cluster->cluster_id);
cpumask_set_cpu(cpu, &cluster->cluster_cpus);
*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
break;
return cluster;
}
return cluster;
return NULL;
}
static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)

View File

@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
do { \
unsigned long _sz = huge_page_size(h); \
if (_sz == PMD_SIZE) \
tlb_flush_pmd_range(tlb, address, _sz); \
else if (_sz == PUD_SIZE) \
if (_sz >= P4D_SIZE) \
tlb_flush_p4d_range(tlb, address, _sz); \
else if (_sz >= PUD_SIZE) \
tlb_flush_pud_range(tlb, address, _sz); \
else if (_sz >= PMD_SIZE) \
tlb_flush_pmd_range(tlb, address, _sz); \
else \
tlb_flush_pte_range(tlb, address, _sz); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)