2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-27 14:43:58 +08:00
linux-next/arch/arm/mach-exynos/mcpm-exynos.c
Mark Rutland 2c32c65e37 ARM: 8129/1: errata: work around Cortex-A15 erratum 830321 using dummy strex
On revisions of Cortex-A15 prior to r3p3, a CLREX instruction at PL1 may
falsely trigger a watchpoint exception, leading to potential data aborts
during exception return and/or livelock.

This patch resolves the issue in the following ways:

  - Replacing our uses of CLREX with a dummy STREX sequence instead (as
    we did for v6 CPUs).

  - Removing the clrex code from v7_exit_coherency_flush and derivatives,
    since this only exists as a minor performance improvement when
    non-cached exclusives are in use (Linux doesn't use these).

Benchmarking on a variety of ARM cores revealed no measurable
performance difference with this change applied, so the change is
performed unconditionally and no new Kconfig entry is added.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-08-27 15:40:13 +01:00

405 lines
11 KiB
C

/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* http://www.samsung.com
*
* arch/arm/mach-exynos/mcpm-exynos.c
*
* Based on arch/arm/mach-vexpress/dcscb.c
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/arm-cci.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/of_address.h>
#include <asm/cputype.h>
#include <asm/cp15.h>
#include <asm/mcpm.h>
#include "regs-pmu.h"
#include "common.h"
#define EXYNOS5420_CPUS_PER_CLUSTER 4
#define EXYNOS5420_NR_CLUSTERS 2
#define EXYNOS5420_ENABLE_AUTOMATIC_CORE_DOWN BIT(9)
#define EXYNOS5420_USE_ARM_CORE_DOWN_STATE BIT(29)
#define EXYNOS5420_USE_L2_COMMON_UP_STATE BIT(30)
/*
* The common v7_exit_coherency_flush API could not be used because of the
* Erratum 799270 workaround. This macro is the same as the common one (in
* arch/arm/include/asm/cacheflush.h) except for the erratum handling.
*/
#define exynos_v7_exit_coherency_flush(level) \
asm volatile( \
"stmfd sp!, {fp, ip}\n\t"\
"mrc p15, 0, r0, c1, c0, 0 @ get SCTLR\n\t" \
"bic r0, r0, #"__stringify(CR_C)"\n\t" \
"mcr p15, 0, r0, c1, c0, 0 @ set SCTLR\n\t" \
"isb\n\t"\
"bl v7_flush_dcache_"__stringify(level)"\n\t" \
"mrc p15, 0, r0, c1, c0, 1 @ get ACTLR\n\t" \
"bic r0, r0, #(1 << 6) @ disable local coherency\n\t" \
/* Dummy Load of a device register to avoid Erratum 799270 */ \
"ldr r4, [%0]\n\t" \
"and r4, r4, #0\n\t" \
"orr r0, r0, r4\n\t" \
"mcr p15, 0, r0, c1, c0, 1 @ set ACTLR\n\t" \
"isb\n\t" \
"dsb\n\t" \
"ldmfd sp!, {fp, ip}" \
: \
: "Ir" (pmu_base_addr + S5P_INFORM0) \
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
"r9", "r10", "lr", "memory")
/*
* We can't use regular spinlocks. In the switcher case, it is possible
* for an outbound CPU to call power_down() after its inbound counterpart
* is already live using the same logical CPU number which trips lockdep
* debugging.
*/
static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int
cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
#define exynos_cluster_usecnt(cluster) \
(cpu_use_count[0][cluster] + \
cpu_use_count[1][cluster] + \
cpu_use_count[2][cluster] + \
cpu_use_count[3][cluster])
#define exynos_cluster_unused(cluster) !exynos_cluster_usecnt(cluster)
static int exynos_power_up(unsigned int cpu, unsigned int cluster)
{
unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
cluster >= EXYNOS5420_NR_CLUSTERS)
return -EINVAL;
/*
* Since this is called with IRQs enabled, and no arch_spin_lock_irq
* variant exists, we need to disable IRQs manually here.
*/
local_irq_disable();
arch_spin_lock(&exynos_mcpm_lock);
cpu_use_count[cpu][cluster]++;
if (cpu_use_count[cpu][cluster] == 1) {
bool was_cluster_down =
(exynos_cluster_usecnt(cluster) == 1);
/*
* Turn on the cluster (L2/COMMON) and then power on the
* cores.
*/
if (was_cluster_down)
exynos_cluster_power_up(cluster);
exynos_cpu_power_up(cpunr);
} else if (cpu_use_count[cpu][cluster] != 2) {
/*
* The only possible values are:
* 0 = CPU down
* 1 = CPU (still) up
* 2 = CPU requested to be up before it had a chance
* to actually make itself down.
* Any other value is a bug.
*/
BUG();
}
arch_spin_unlock(&exynos_mcpm_lock);
local_irq_enable();
return 0;
}
/*
* NOTE: This function requires the stack data to be visible through power down
* and can only be executed on processors like A15 and A7 that hit the cache
* with the C bit clear in the SCTLR register.
*/
static void exynos_power_down(void)
{
unsigned int mpidr, cpu, cluster;
bool last_man = false, skip_wfi = false;
unsigned int cpunr;
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
cluster >= EXYNOS5420_NR_CLUSTERS);
__mcpm_cpu_going_down(cpu, cluster);
arch_spin_lock(&exynos_mcpm_lock);
BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
cpu_use_count[cpu][cluster]--;
if (cpu_use_count[cpu][cluster] == 0) {
exynos_cpu_power_down(cpunr);
if (exynos_cluster_unused(cluster)) {
exynos_cluster_power_down(cluster);
last_man = true;
}
} else if (cpu_use_count[cpu][cluster] == 1) {
/*
* A power_up request went ahead of us.
* Even if we do not want to shut this CPU down,
* the caller expects a certain state as if the WFI
* was aborted. So let's continue with cache cleaning.
*/
skip_wfi = true;
} else {
BUG();
}
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&exynos_mcpm_lock);
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
*/
asm volatile(
"mcr p15, 1, %0, c15, c0, 3\n\t"
"isb\n\t"
"dsb"
: : "r" (0x400));
}
/* Flush all cache levels for this cluster. */
exynos_v7_exit_coherency_flush(all);
/*
* Disable cluster-level coherency by masking
* incoming snoops and DVM messages:
*/
cci_disable_port_by_cpu(mpidr);
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
} else {
arch_spin_unlock(&exynos_mcpm_lock);
/* Disable and flush the local CPU cache. */
exynos_v7_exit_coherency_flush(louis);
}
__mcpm_cpu_down(cpu, cluster);
/* Now we are prepared for power-down, do it: */
if (!skip_wfi)
wfi();
/* Not dead at this point? Let our caller cope. */
}
static int exynos_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
{
unsigned int tries = 100;
unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
cluster >= EXYNOS5420_NR_CLUSTERS);
/* Wait for the core state to be OFF */
while (tries--) {
if (ACCESS_ONCE(cpu_use_count[cpu][cluster]) == 0) {
if ((exynos_cpu_power_state(cpunr) == 0))
return 0; /* success: the CPU is halted */
}
/* Otherwise, wait and retry: */
msleep(1);
}
return -ETIMEDOUT; /* timeout */
}
static void exynos_powered_up(void)
{
unsigned int mpidr, cpu, cluster;
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
arch_spin_lock(&exynos_mcpm_lock);
if (cpu_use_count[cpu][cluster] == 0)
cpu_use_count[cpu][cluster] = 1;
arch_spin_unlock(&exynos_mcpm_lock);
}
static void exynos_suspend(u64 residency)
{
unsigned int mpidr, cpunr;
exynos_power_down();
/*
* Execution reaches here only if cpu did not power down.
* Hence roll back the changes done in exynos_power_down function.
*
* CAUTION: "This function requires the stack data to be visible through
* power down and can only be executed on processors like A15 and A7
* that hit the cache with the C bit clear in the SCTLR register."
*/
mpidr = read_cpuid_mpidr();
cpunr = exynos_pmu_cpunr(mpidr);
exynos_cpu_power_up(cpunr);
}
static const struct mcpm_platform_ops exynos_power_ops = {
.power_up = exynos_power_up,
.power_down = exynos_power_down,
.wait_for_powerdown = exynos_wait_for_powerdown,
.suspend = exynos_suspend,
.powered_up = exynos_powered_up,
};
static void __init exynos_mcpm_usage_count_init(void)
{
unsigned int mpidr, cpu, cluster;
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
cluster >= EXYNOS5420_NR_CLUSTERS);
cpu_use_count[cpu][cluster] = 1;
}
/*
* Enable cluster-level coherency, in preparation for turning on the MMU.
*/
static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
{
asm volatile ("\n"
"cmp r0, #1\n"
"bxne lr\n"
"b cci_enable_port_for_self");
}
static void __init exynos_cache_off(void)
{
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/* disable L2 prefetching on the Cortex-A15 */
asm volatile(
"mcr p15, 1, %0, c15, c0, 3\n\t"
"isb\n\t"
"dsb"
: : "r" (0x400));
}
exynos_v7_exit_coherency_flush(all);
}
static const struct of_device_id exynos_dt_mcpm_match[] = {
{ .compatible = "samsung,exynos5420" },
{ .compatible = "samsung,exynos5800" },
{},
};
static int __init exynos_mcpm_init(void)
{
struct device_node *node;
void __iomem *ns_sram_base_addr;
unsigned int value, i;
int ret;
node = of_find_matching_node(NULL, exynos_dt_mcpm_match);
if (!node)
return -ENODEV;
of_node_put(node);
if (!cci_probed())
return -ENODEV;
node = of_find_compatible_node(NULL, NULL,
"samsung,exynos4210-sysram-ns");
if (!node)
return -ENODEV;
ns_sram_base_addr = of_iomap(node, 0);
of_node_put(node);
if (!ns_sram_base_addr) {
pr_err("failed to map non-secure iRAM base address\n");
return -ENOMEM;
}
/*
* To increase the stability of KFC reset we need to program
* the PMU SPARE3 register
*/
pmu_raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
exynos_mcpm_usage_count_init();
ret = mcpm_platform_register(&exynos_power_ops);
if (!ret)
ret = mcpm_sync_init(exynos_pm_power_up_setup);
if (!ret)
ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
if (ret) {
iounmap(ns_sram_base_addr);
return ret;
}
mcpm_smp_set_ops();
pr_info("Exynos MCPM support installed\n");
/*
* On Exynos5420/5800 for the A15 and A7 clusters:
*
* EXYNOS5420_ENABLE_AUTOMATIC_CORE_DOWN ensures that all the cores
* in a cluster are turned off before turning off the cluster L2.
*
* EXYNOS5420_USE_ARM_CORE_DOWN_STATE ensures that a cores is powered
* off before waking it up.
*
* EXYNOS5420_USE_L2_COMMON_UP_STATE ensures that cluster L2 will be
* turned on before the first man is powered up.
*/
for (i = 0; i < EXYNOS5420_NR_CLUSTERS; i++) {
value = pmu_raw_readl(EXYNOS_COMMON_OPTION(i));
value |= EXYNOS5420_ENABLE_AUTOMATIC_CORE_DOWN |
EXYNOS5420_USE_ARM_CORE_DOWN_STATE |
EXYNOS5420_USE_L2_COMMON_UP_STATE;
pmu_raw_writel(value, EXYNOS_COMMON_OPTION(i));
}
/*
* U-Boot SPL is hardcoded to jump to the start of ns_sram_base_addr
* as part of secondary_cpu_start(). Let's redirect it to the
* mcpm_entry_point().
*/
__raw_writel(0xe59f0000, ns_sram_base_addr); /* ldr r0, [pc, #0] */
__raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx r0 */
__raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8);
iounmap(ns_sram_base_addr);
return ret;
}
early_initcall(exynos_mcpm_init);