powerpc fixes for 4.16 #2

A larger batch of fixes than we'd like. Roughly 1/3 fixes for new code, 1/3
 fixes for stable and 1/3 minor things.
 
 There's four commits fixing bugs when using 16GB huge pages on hash, caused by
 some of the preparatory changes for pkeys.
 
 Two fixes for bugs in the enhanced IRQ soft masking for local_t, one of which
 broke KVM in some circumstances.
 
 Four fixes for Power9. The most bizarre being a bug where futexes stopped
 working because a NULL pointer dereference didn't trap during early boot (it
 aliased the kernel mapping). A fix for memory hotplug when using the Radix MMU,
 and a fix for live migration of guests using the Radix MMU.
 
 Two fixes for hotplug on pseries machines. One where we weren't correctly
 updating NUMA info when CPUs are added and removed. And the other fixes
 crashes/hangs seen when doing memory hot remove during boot, which is apparently
 a thing people do.
 
 Finally a handful of build fixes for obscure configs and other minor fixes.
 
 Thanks to:
   Alexey Kardashevskiy, Aneesh Kumar K.V, Balbir Singh, Colin Ian King, Daniel
   Henrique Barboza, Florian Weimer, Guenter Roeck, Harish, Laurent Vivier,
   Madhavan Srinivasan, Mauricio Faria de Oliveira, Nathan Fontenot, Nicholas
   Piggin, Sam Bobroff.
 -----BEGIN PGP SIGNATURE-----
 
 iQIwBAABCAAaBQJahDTmExxtcGVAZWxsZXJtYW4uaWQuYXUACgkQUevqPMjhpYAd
 chAAtVe8hmkEJefTbU63GBeqva0JHSiTu2DENZAlN/epWtbtyl05PLETMdTcwGCv
 nK2zzR+xbSFN1DzZK8KQfDBW33McKZE+YkHwYOC8Kff/N0SKdHK4zvxYr7FTZGzG
 9uSG5vrxVEsPLT/yANabl0d0vKWMsJ1jZquvJAU0eLNUbA/skGjEPADtXqYQUXiA
 EnW4xeczsMLjuzTleoRqrBx74Gulovuq9LVAjfDvkydWlCU9MQkrodCgP0V2hQtw
 RAJ/QLY+NS/vMCBnvVOGBaKzIqrfeQTHF3P0j4pyBeBq/2kNuidM5n25uoc31wUq
 DE4Ebe2FJA6CHP5KEyf7dr9y7gsks/ak3/CKs+l6Yz3/0BqenEMhu6WKJ1tgf9cC
 qAmi1dIjtpw6JZ6baCbkloUdAGNjKVfLWB9ld9VIfg0C+C3y4L7+TKJukxrCBGI6
 hopfT/3p8xUdla3euiRXRLZzajyKDGrqk71hk5J/J0ChXfWB0B51X0F6NIfH41Mn
 YsVUQ95p3zS79Pl942ijGScFX/bNVLfEEGzlI/nwU/wbTxF5g/XNXm5PjBsGSr/W
 zFcCwCpFV2b/kypQoxQA5CbrKRCLOleDA/lLOxW/1NMYOQsNj05DM9wYAw5Bl+lX
 AVj2c5jM9heNN4scxDiufRNfqZbyjZ4fFUpXLNqs7N5vcks=
 =BmuL
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "A larger batch of fixes than we'd like. Roughly 1/3 fixes for new
  code, 1/3 fixes for stable and 1/3 minor things.

  There's four commits fixing bugs when using 16GB huge pages on hash,
  caused by some of the preparatory changes for pkeys.

  Two fixes for bugs in the enhanced IRQ soft masking for local_t, one
  of which broke KVM in some circumstances.

  Four fixes for Power9. The most bizarre being a bug where futexes
  stopped working because a NULL pointer dereference didn't trap during
  early boot (it aliased the kernel mapping). A fix for memory hotplug
  when using the Radix MMU, and a fix for live migration of guests using
  the Radix MMU.

  Two fixes for hotplug on pseries machines. One where we weren't
  correctly updating NUMA info when CPUs are added and removed. And the
  other fixes crashes/hangs seen when doing memory hot remove during
  boot, which is apparently a thing people do.

  Finally a handful of build fixes for obscure configs and other minor
  fixes.

  Thanks to: Alexey Kardashevskiy, Aneesh Kumar K.V, Balbir Singh, Colin
  Ian King, Daniel Henrique Barboza, Florian Weimer, Guenter Roeck,
  Harish, Laurent Vivier, Madhavan Srinivasan, Mauricio Faria de
  Oliveira, Nathan Fontenot, Nicholas Piggin, Sam Bobroff"

* tag 'powerpc-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  selftests/powerpc: Fix to use ucontext_t instead of struct ucontext
  powerpc/kdump: Fix powernv build break when KEXEC_CORE=n
  powerpc/pseries: Fix build break for SPLPAR=n and CPU hotplug
  powerpc/mm/hash64: Zero PGD pages on allocation
  powerpc/mm/hash64: Store the slot information at the right offset for hugetlb
  powerpc/mm/hash64: Allocate larger PMD table if hugetlb config is enabled
  powerpc/mm: Fix crashes with 16G huge pages
  powerpc/mm: Flush radix process translations when setting MMU type
  powerpc/vas: Don't set uses_vas for kernel windows
  powerpc/pseries: Enable RAS hotplug events later
  powerpc/mm/radix: Split linear mapping on hot-unplug
  powerpc/64s/radix: Boot-time NULL pointer protection using a guard-PID
  ocxl: fix signed comparison with less than zero
  powerpc/64s: Fix may_hard_irq_enable() for PMI soft masking
  powerpc/64s: Fix MASKABLE_RELON_EXCEPTION_HV_OOL macro
  powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove
This commit is contained in:
Linus Torvalds 2018-02-14 10:06:41 -08:00
commit 694a20dae6
28 changed files with 231 additions and 79 deletions

View File

@ -16,6 +16,7 @@
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)

View File

@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
* keeping the prototype consistent across the two formats.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
unsigned int subpg_index, unsigned long hidx)
unsigned int subpg_index, unsigned long hidx,
int offset)
{
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);

View File

@ -45,7 +45,7 @@
* generic accessors and iterators here
*/
#define __real_pte __real_pte
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
{
real_pte_t rpte;
unsigned long *hidxp;
@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
*/
smp_rmb();
hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
}
@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
* expected to modify the PTE bits accordingly and commit the PTE to memory.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
unsigned int subpg_index, unsigned long hidx)
unsigned int subpg_index,
unsigned long hidx, int offset)
{
unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
unsigned long *hidxp = (unsigned long *)(ptep + offset);
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
}
#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
(sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
#ifdef CONFIG_HUGETLB_PAGE
#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
(sizeof(unsigned long) << PUD_INDEX_SIZE))
#else
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#endif
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE

View File

@ -23,7 +23,8 @@
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
defined(CONFIG_PPC_64K_PAGES)
/*
* only with hash 64k we need to use the second half of pmd page table
* to store pointer to deposited pgtable_t
@ -32,6 +33,16 @@
#else
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
/*
* We store the slot details in the second half of page table.
* Increase the pud level table so that hugetlb ptes can be stored
* at pud level.
*/
#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
#else
#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
#endif
/*
* Define the address range of the kernel non-linear virtual area
*/

View File

@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
if (radix_enabled())
return radix__pgd_alloc(mm);
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
memset(pgd, 0, PGD_TABLE_SIZE);
return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
* ahead and flush the page walk cache
*/
flush_tlb_pgtable(tlb, address);
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)

View File

@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
extern unsigned long __pud_index_size;
extern unsigned long __pgd_index_size;
extern unsigned long __pmd_cache_index;
extern unsigned long __pud_cache_index;
#define PTE_INDEX_SIZE __pte_index_size
#define PMD_INDEX_SIZE __pmd_index_size
#define PUD_INDEX_SIZE __pud_index_size
#define PGD_INDEX_SIZE __pgd_index_size
#define PMD_CACHE_INDEX __pmd_cache_index
#define PUD_CACHE_INDEX __pud_cache_index
/*
* Because of use of pte fragments and THP, size of page table
* are not always derived out of index size above.
@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
*/
#ifndef __real_pte
#define __real_pte(e,p) ((real_pte_t){(e)})
#define __real_pte(e, p, o) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)

View File

@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXC_HV, SOFTEN_TEST_HV, bitmask)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*

View File

@ -29,6 +29,16 @@
#define PACA_IRQ_HMI 0x20
#define PACA_IRQ_PMI 0x40
/*
* Some soft-masked interrupts must be hard masked until they are replayed
* (e.g., because the soft-masked handler does not clear the exception).
*/
#ifdef CONFIG_PPC_BOOK3S
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
#else
#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
#endif
/*
* flags for paca->irq_soft_mask
*/
@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
static inline void may_hard_irq_enable(void)
{
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
if (!(get_paca()->irq_happened & PACA_IRQ_EE))
if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
__hard_irq_enable();
}

View File

@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
return false;
}
static inline void crash_ipi_callback(struct pt_regs *regs) { }
static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
}
#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */

View File

@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)

View File

@ -27,6 +27,7 @@
#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#endif
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
* Define the address range of the kernel non-linear virtual area

View File

@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
}
static inline int early_cpu_to_node(int cpu)
{
int nid;
@ -82,6 +87,7 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
extern int find_and_online_cpu_nid(int cpu);
#else
static inline int start_topology_update(void)
{
@ -95,6 +101,10 @@ static inline int prrn_is_enabled(void)
{
return 0;
}
static inline int find_and_online_cpu_nid(int cpu)
{
return 0;
}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)

View File

@ -943,6 +943,8 @@ kernel_dbg_exc:
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
* hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
* keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask

View File

@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
* and it won't refire.
* - else we hard disable and return.
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
#define MASKED_INTERRUPT(_H) \
@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
bne 2f; \
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
beq 2f; \
mfspr r10,SPRN_##_H##SRR1; \
xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \

View File

@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -117,7 +117,7 @@ repeat:
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;

View File

@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
/*
*None of the sub 4k page is hashed
*/
@ -214,7 +214,7 @@ repeat:
return -1;
}
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -327,7 +327,7 @@ repeat:
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;

View File

@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
__pmd_index_size = H_PMD_INDEX_SIZE;
__pud_index_size = H_PUD_INDEX_SIZE;
__pgd_index_size = H_PGD_INDEX_SIZE;
__pud_cache_index = H_PUD_CACHE_INDEX;
__pmd_cache_index = H_PMD_CACHE_INDEX;
__pte_table_size = H_PTE_TABLE_SIZE;
__pmd_table_size = H_PMD_TABLE_SIZE;

View File

@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
long slot;
long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
rpte = __real_pte(__pte(old_pte), ptep);
if (unlikely(mmu_psize == MMU_PAGE_16G))
offset = PTRS_PER_PUD;
else
offset = PTRS_PER_PMD;
rpte = __real_pte(__pte(old_pte), ptep, offset);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
}
/*

View File

@ -100,6 +100,6 @@ void pgtable_cache_init(void)
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
}

View File

@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
}
static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
}
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);

View File

@ -17,9 +17,11 @@
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/string_helpers.h>
#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
/*
* The init_mm context is given the first available (non-zero) PID,
* which is the "guard PID" and contains no page table. PIDR should
* never be set to zero because that duplicates the kernel address
* space at the 0x0... offset (quadrant 0)!
*
* An arbitrary PID that may later be allocated by the PID allocator
* for userspace processes must not be used either, because that
* would cause stale user mappings for that PID on CPUs outside of
* the TLB invalidation scheme (because it won't be in mm_cpumask).
*
* So permanently carve out one PID for the purpose of a guard PID.
*/
init_mm.context.id = mmu_base_pid;
mmu_base_pid++;
}
static void __init radix_init_partition_table(void)
@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
__pmd_index_size = RADIX_PMD_INDEX_SIZE;
__pud_index_size = RADIX_PUD_INDEX_SIZE;
__pgd_index_size = RADIX_PGD_INDEX_SIZE;
__pud_cache_index = RADIX_PUD_INDEX_SIZE;
__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
__pte_table_size = RADIX_PTE_TABLE_SIZE;
__pmd_table_size = RADIX_PMD_TABLE_SIZE;
@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
}
radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
struct change_mapping_params {
pte_t *pte;
unsigned long start;
unsigned long end;
unsigned long aligned_start;
unsigned long aligned_end;
};
static int stop_machine_change_mapping(void *data)
{
struct change_mapping_params *params =
(struct change_mapping_params *)data;
if (!data)
return -1;
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
create_physical_mapping(params->aligned_start, params->start);
create_physical_mapping(params->end, params->aligned_end);
spin_lock(&init_mm.page_table_lock);
return 0;
}
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
/*
* clear the pte and potentially split the mapping helper
*/
static void split_kernel_mapping(unsigned long addr, unsigned long end,
unsigned long size, pte_t *pte)
{
unsigned long mask = ~(size - 1);
unsigned long aligned_start = addr & mask;
unsigned long aligned_end = addr + size;
struct change_mapping_params params;
bool split_region = false;
if ((end - addr) < size) {
/*
* We're going to clear the PTE, but not flushed
* the mapping, time to remap and flush. The
* effects if visible outside the processor or
* if we are running in code close to the
* mapping we cleared, we are in trouble.
*/
if (overlaps_kernel_text(aligned_start, addr) ||
overlaps_kernel_text(end, aligned_end)) {
/*
* Hack, just return, don't pte_clear
*/
WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
"text, not splitting\n", addr, end);
return;
}
split_region = true;
}
if (split_region) {
params.pte = pte;
params.start = addr;
params.end = end;
params.aligned_start = addr & ~(size - 1);
params.aligned_end = min_t(unsigned long, aligned_end,
(unsigned long)__va(memblock_end_of_DRAM()));
stop_machine(stop_machine_change_mapping, &params, NULL);
return;
}
pte_clear(&init_mm, addr, pte);
}
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_huge(*pmd)) {
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pmd);
split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_huge(*pud)) {
if (!IS_ALIGNED(addr, PUD_SIZE) ||
!IS_ALIGNED(next, PUD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pud);
split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
continue;
if (pgd_huge(*pgd)) {
if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
!IS_ALIGNED(next, PGDIR_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
continue;
}
pte_clear(&init_mm, addr, (pte_t *)pgd);
split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}

View File

@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
EXPORT_SYMBOL(__pgd_index_size);
unsigned long __pmd_cache_index;
EXPORT_SYMBOL(__pmd_cache_index);
unsigned long __pud_cache_index;
EXPORT_SYMBOL(__pud_cache_index);
unsigned long __pte_table_size;
EXPORT_SYMBOL(__pte_table_size);
unsigned long __pmd_table_size;
@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :

View File

@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
int ssize;
real_pte_t rpte;
int i;
int i, offset;
i = batch->index;
@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
if (unlikely(psize == MMU_PAGE_16G))
offset = PTRS_PER_PUD;
else
offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
* support 64k pages, this might be different from the
* hardware page size encoded in the slice table. */
addr &= PAGE_MASK;
offset = PTRS_PER_PTE;
}
@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
rpte = __real_pte(__pte(pte), ptep);
rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just

View File

@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rc = PTR_ERR(txwin->paste_kaddr);
goto free_window;
}
} else {
/*
* A user mapping must ensure that context switch issues
* CP_ABORT for this thread.
*/
rc = set_thread_uses_vas();
if (rc)
goto free_window;
}
/*
* Now that we have a send window, ensure context switch issues
* CP_ABORT for this thread.
*/
rc = -EINVAL;
if (set_thread_uses_vas() < 0)
goto free_window;
set_vinst_win(vinst, txwin);
return txwin;

View File

@ -36,6 +36,7 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
extern int find_and_online_cpu_nid(int cpu);
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;

View File

@ -48,6 +48,28 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
/*
* Enable the hotplug interrupt late because processing them may touch other
* devices or systems (e.g. hugepages) that have not been initialized at the
* subsys stage.
*/
int __init init_ras_hotplug_IRQ(void)
{
struct device_node *np;
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
if (dlpar_workqueue_init() == 0)
request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
return 0;
}
machine_late_initcall(pseries, init_ras_hotplug_IRQ);
/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
if (dlpar_workqueue_init() == 0)
request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {

View File

@ -277,7 +277,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
struct ocxl_context *ctx = file->private_data;
struct ocxl_kernel_event_header header;
ssize_t rc;
size_t used = 0;
ssize_t used = 0;
DEFINE_WAIT(event_wait);
memset(&header, 0, sizeof(header));

View File

@ -57,7 +57,7 @@ volatile int gotsig;
void sighandler(int sig, siginfo_t *info, void *ctx)
{
struct ucontext *ucp = ctx;
ucontext_t *ucp = ctx;
if (!testing) {
signal(sig, SIG_DFL);