From 7a06c66835f75fe2be4f154a93cc30cb81734b81 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 10 Nov 2017 10:25:07 +0530
Subject: [PATCH 1/7] powerpc/64s/slice: Use addr limit when computing slice
 mask

While computing slice mask for the free area we need make sure we only
search in the addr limit applicable for this mmap. We update the
slb_addr_limit after we request for a mmap above 128TB. But the
following mmap request with hint addr below 128TB should still limit
its search to below 128TB. ie. we should not use slb_addr_limit to
compute slice mask in this case. Instead, we should derive high addr
limit based on the mmap hint addr value.

Fixes: f4ea6dcb08ea ("powerpc/mm: Enable mappings above 128TB")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slice.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 564fff06f5c1..23ec2c5e3b78 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -122,7 +122,8 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 	return !slice_area_is_free(mm, start, end - start);
 }
 
-static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
+				unsigned long high_limit)
 {
 	unsigned long i;
 
@@ -133,15 +134,16 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
 		if (!slice_low_has_vma(mm, i))
 			ret->low_slices |= 1u << i;
 
-	if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
+	if (high_limit <= SLICE_LOW_TOP)
 		return;
 
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
 		if (!slice_high_has_vma(mm, i))
 			__set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
+				unsigned long high_limit)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
@@ -156,8 +158,11 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
 			ret->low_slices |= 1u << i;
 
+	if (high_limit <= SLICE_LOW_TOP)
+		return;
+
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,6 +174,10 @@ static int slice_check_fit(struct mm_struct *mm,
 			   struct slice_mask mask, struct slice_mask available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+	/*
+	 * Make sure we just do bit compare only to the max
+	 * addr limit and not the full bit map size.
+	 */
 	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
 
 	bitmap_and(result, mask.high_slices,
@@ -472,7 +481,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	slice_mask_for_size(mm, psize, &good_mask);
+	slice_mask_for_size(mm, psize, &good_mask, high_limit);
 	slice_print_mask(" good_mask", good_mask);
 
 	/*
@@ -497,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If we support combo pages, we can allow 64k pages in 4k slices */
 	if (psize == MMU_PAGE_64K) {
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
 		if (fixed)
 			slice_or_mask(&good_mask, &compat_mask);
 	}
@@ -530,11 +539,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 			return newaddr;
 		}
 	}
-
-	/* We don't fit in the good mask, check what other slices are
+	/*
+	 * We don't fit in the good mask, check what other slices are
 	 * empty and thus can be converted
 	 */
-	slice_mask_for_free(mm, &potential_mask);
+	slice_mask_for_free(mm, &potential_mask, high_limit);
 	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
@@ -744,17 +753,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 {
 	struct slice_mask mask, available;
 	unsigned int psize = mm->context.user_psize;
+	unsigned long high_limit = mm->context.slb_addr_limit;
 
 	if (radix_enabled())
 		return 0;
 
 	slice_range_to_mask(addr, len, &mask);
-	slice_mask_for_size(mm, psize, &available);
+	slice_mask_for_size(mm, psize, &available, high_limit);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
 		slice_or_mask(&available, &compat_mask);
 	}
 #endif

From 62b49c42107efd973edffc75f4f874c5226cd20a Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Mon, 20 Nov 2017 12:53:15 -0600
Subject: [PATCH 2/7] powerpc/vas: Export chip_to_vas_id()

Export the symbol chip_to_vas_id() to fix a build failure when
CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV=m.

Fixes: d4ef61b5e895 ("powerpc/vas, nx-842: Define and use chip_to_vas_id()")
Reported-by: Haren Myneni <hbabu@us.ibm.com>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/vas.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index c488621dbec3..aebbe95c9230 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -135,6 +135,7 @@ int chip_to_vas_id(int chipid)
 	}
 	return -1;
 }
+EXPORT_SYMBOL(chip_to_vas_id);
 
 static int vas_probe(struct platform_device *pdev)
 {

From f3f1dfd600ff82b18b7ea73d80eb27f476a6aa97 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 16 Oct 2017 00:13:41 +0530
Subject: [PATCH 3/7] powerpc/perf/imc: Use cpu_to_node() not
 topology_physical_package_id()

init_imc_pmu() uses topology_physical_package_id() to detect the
node id of the processor it is on to get local memory, but that's
wrong, and can lead to crashes. Fix it to use cpu_to_node().

Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support")
Cc: stable@vger.kernel.org # v4.14+
Reported-By: Rob Lippert <rlippert@google.com>
Tested-By: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/imc-pmu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 36344117c680..cf64e16f92c2 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -467,7 +467,7 @@ static int nest_imc_event_init(struct perf_event *event)
 	 * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
 	 * Get the base memory addresss for this cpu.
 	 */
-	chip_id = topology_physical_package_id(event->cpu);
+	chip_id = cpu_to_chip_id(event->cpu);
 	pcni = pmu->mem_info;
 	do {
 		if (pcni->id == chip_id) {
@@ -524,19 +524,19 @@ static int nest_imc_event_init(struct perf_event *event)
  */
 static int core_imc_mem_init(int cpu, int size)
 {
-	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+	int nid, rc = 0, core_id = (cpu / threads_per_core);
 	struct imc_mem_info *mem_info;
 
 	/*
 	 * alloc_pages_node() will allocate memory for core in the
 	 * local node only.
 	 */
-	phys_id = topology_physical_package_id(cpu);
+	nid = cpu_to_node(cpu);
 	mem_info = &core_imc_pmu->mem_info[core_id];
 	mem_info->id = core_id;
 
 	/* We need only vbase for core counters */
-	mem_info->vbase = page_address(alloc_pages_node(phys_id,
+	mem_info->vbase = page_address(alloc_pages_node(nid,
 					  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 					  __GFP_NOWARN, get_order(size)));
 	if (!mem_info->vbase)
@@ -797,14 +797,14 @@ static int core_imc_event_init(struct perf_event *event)
 static int thread_imc_mem_alloc(int cpu_id, int size)
 {
 	u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
-	int phys_id = topology_physical_package_id(cpu_id);
+	int nid = cpu_to_node(cpu_id);
 
 	if (!local_mem) {
 		/*
 		 * This case could happen only once at start, since we dont
 		 * free the memory in cpu offline path.
 		 */
-		local_mem = page_address(alloc_pages_node(phys_id,
+		local_mem = page_address(alloc_pages_node(nid,
 				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 				  __GFP_NOWARN, get_order(size)));
 		if (!local_mem)

From 252eb55816a6f69ef9464cad303cdb3326cdc61d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 21 Nov 2017 15:28:20 +0100
Subject: [PATCH 4/7] powerpc: Fix boot on BOOK3S_32 with
 CONFIG_STRICT_KERNEL_RWX

On powerpc32, patch_instruction() is called by apply_feature_fixups()
which is called from early_init()

There is the following note in front of early_init():
 * Note that the kernel may be running at an address which is different
 * from the address that it was linked at, so we must use RELOC/PTRRELOC
 * to access static data (including strings).  -- paulus

Therefore, slab_is_available() cannot be called yet, and
text_poke_area must be addressed with PTRRELOC()

Fixes: 95902e6c8864 ("powerpc/mm: Implement STRICT_KERNEL_RWX on PPC32")
Cc: stable@vger.kernel.org # v4.14+
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/code-patching.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c9de03e0c1f1..d469224c4ada 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,6 +21,7 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
+#include <asm/setup.h>
 
 static int __patch_instruction(unsigned int *addr, unsigned int instr)
 {
@@ -146,11 +147,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
 	 * During early early boot patch_instruction is called
 	 * when text_poke_area is not ready, but we still need
 	 * to allow patching. We just do the plain old patching
-	 * We use slab_is_available and per cpu read * via this_cpu_read
-	 * of text_poke_area. Per-CPU areas might not be up early
-	 * this can create problems with just using this_cpu_read()
 	 */
-	if (!slab_is_available() || !this_cpu_read(text_poke_area))
+	if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
 		return __patch_instruction(addr, instr);
 
 	local_irq_save(flags);

From de34787f1096cce38e2590be0013b44418d14546 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Wed, 22 Nov 2017 10:45:38 +0530
Subject: [PATCH 5/7] powerpc/perf: Fix pmu_count to count only nest imc pmus

"pmu_count" in opal_imc_counters_probe() is intended to hold
the number of successful nest imc pmu registerations. But
current code also counts other imc units like core_imc and
thread_imc. Patch add a check to count only nest imc pmus.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-imc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531fae20..b150f4deaccf 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -191,8 +191,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 			break;
 		}
 
-		if (!imc_pmu_create(imc_dev, pmu_count, domain))
-			pmu_count++;
+		if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
+			if (domain == IMC_DOMAIN_NEST)
+				pmu_count++;
+		}
 	}
 
 	return 0;

From 73ce9aec65b17433e18163d07eb5cb6bf114bd6c Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Wed, 22 Nov 2017 10:45:39 +0530
Subject: [PATCH 6/7] powerpc/perf: Fix IMC_MAX_PMU macro

IMC_MAX_PMU is used for static storage (per_nest_pmu_arr) which holds
nest pmu information. Current value for the macro is 32 based on
the initial number of nest pmu units supported by the nest microcode.
But going forward, microcode could support more nest units. Instead
of static storage, patch to fix the code to dynamically allocate an
array based on the number of nest imc units found in the device tree.

Fixes:8f95faaac56c1 ('powerpc/powernv: Detect and create IMC device')
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/imc-pmu.h        |  6 +-----
 arch/powerpc/perf/imc-pmu.c               | 15 ++++++++++++---
 arch/powerpc/platforms/powernv/opal-imc.c | 16 ++++++++++++++++
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c282710f..fad0e6ff460f 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -20,11 +20,6 @@
 #include <linux/io.h>
 #include <asm/opal.h>
 
-/*
- * For static allocation of some of the structures.
- */
-#define IMC_MAX_PMUS			32
-
 /*
  * Compatibility macros for IMC devices
  */
@@ -125,4 +120,5 @@ enum {
 extern int init_imc_pmu(struct device_node *parent,
 				struct imc_pmu *pmu_ptr, int pmu_id);
 extern void thread_imc_disable(void);
+extern int get_max_nest_dev(void);
 #endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index cf64e16f92c2..0ead3cd73caa 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -26,7 +26,7 @@
  */
 static DEFINE_MUTEX(nest_init_lock);
 static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
-static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static struct imc_pmu **per_nest_pmu_arr;
 static cpumask_t nest_imc_cpumask;
 struct imc_pmu_ref *nest_imc_refc;
 static int nest_pmus;
@@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
 static void nest_change_cpu_context(int old_cpu, int new_cpu)
 {
 	struct imc_pmu **pn = per_nest_pmu_arr;
-	int i;
 
 	if (old_cpu < 0 || new_cpu < 0)
 		return;
 
-	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+	while (*pn) {
 		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+		pn++;
+	}
 }
 
 static int ppc_nest_imc_cpu_offline(unsigned int cpu)
@@ -1194,6 +1195,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
 	kfree(pmu_ptr);
+	kfree(per_nest_pmu_arr);
 	return;
 }
 
@@ -1218,6 +1220,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 			return -ENOMEM;
 
 		/* Needed for hotplug/migration */
+		if (!per_nest_pmu_arr) {
+			per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
+						sizeof(struct imc_pmu *),
+						GFP_KERNEL);
+			if (!per_nest_pmu_arr)
+				return -ENOMEM;
+		}
 		per_nest_pmu_arr[pmu_index] = pmu_ptr;
 		break;
 	case IMC_DOMAIN_CORE:
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index b150f4deaccf..465ea105b771 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void)
 	put_online_cpus();
 }
 
+int get_max_nest_dev(void)
+{
+	struct device_node *node;
+	u32 pmu_units = 0, type;
+
+	for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+		if (of_property_read_u32(node, "type", &type))
+			continue;
+
+		if (type == IMC_TYPE_CHIP)
+			pmu_units++;
+	}
+
+	return pmu_units;
+}
+
 static int opal_imc_counters_probe(struct platform_device *pdev)
 {
 	struct device_node *imc_dev = pdev->dev.of_node;

From 4d6c51b107cc73b15a377224549aa5593f90df89 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 22 Nov 2017 23:17:01 +1100
Subject: [PATCH 7/7] powerpc/64s: Fix Power9 DD2.1 logic in DT CPU features

I got the logic wrong in the DT CPU features code when I added the
Power9 DD2.1 feature. We should be setting the bit if we detect a
DD2.1, not clearing it if we detect a DD2.0.

This code isn't actually exercised at the moment so nothing is
actually broken.

Fixes: 3ffa9d9e2a7c ("powerpc/64s: Fix Power9 DD2.0 workarounds by adding DD2.1 feature")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 602e0fde19b4..8bdc2f96c5d6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -735,8 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
 	 */
 	if ((version & 0xffffff00) == 0x004e0100)
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
-	else if ((version & 0xffffefff) == 0x004e0200)
-		cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1;
+	else if ((version & 0xffffefff) == 0x004e0201)
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 }
 
 static void __init cpufeatures_setup_finished(void)