2009-01-07 23:14:39 +08:00
|
|
|
/*
|
2009-09-24 22:11:24 +08:00
|
|
|
* IPI management based on arch/arm/kernel/smp.c (Copyright 2002 ARM Limited)
|
2009-01-07 23:14:39 +08:00
|
|
|
*
|
2009-09-24 22:11:24 +08:00
|
|
|
* Copyright 2007-2009 Analog Devices Inc.
|
|
|
|
* Philippe Gerum <rpm@xenomai.org>
|
2009-01-07 23:14:39 +08:00
|
|
|
*
|
2009-09-24 22:11:24 +08:00
|
|
|
* Licensed under the GPL-2.
|
2009-01-07 23:14:39 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/cache.h>
|
2011-12-12 11:04:05 +08:00
|
|
|
#include <linux/clockchips.h>
|
2009-01-07 23:14:39 +08:00
|
|
|
#include <linux/profile.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/smp.h>
|
2009-09-21 19:51:31 +08:00
|
|
|
#include <linux/cpumask.h>
|
2009-01-07 23:14:39 +08:00
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/irq.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2009-01-07 23:14:39 +08:00
|
|
|
#include <asm/cacheflush.h>
|
2011-04-15 15:06:59 +08:00
|
|
|
#include <asm/irq_handler.h>
|
2009-01-07 23:14:39 +08:00
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/pgalloc.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#include <asm/cpu.h>
|
2009-05-15 19:01:59 +08:00
|
|
|
#include <asm/time.h>
|
2009-01-07 23:14:39 +08:00
|
|
|
#include <linux/err.h>
|
|
|
|
|
2009-05-06 18:38:07 +08:00
|
|
|
/*
|
|
|
|
* Anomaly notes:
|
|
|
|
* 05000120 - we always define corelock as 32-bit integer in L2
|
|
|
|
*/
|
2009-01-07 23:14:39 +08:00
|
|
|
struct corelock_slot corelock __attribute__ ((__section__(".l2.bss")));
|
|
|
|
|
2010-08-05 15:49:26 +08:00
|
|
|
#ifdef CONFIG_ICACHE_FLUSH_L1
|
|
|
|
unsigned long blackfin_iflush_l1_entry[NR_CPUS];
|
|
|
|
#endif
|
|
|
|
|
2013-06-19 04:56:21 +08:00
|
|
|
struct blackfin_initial_pda initial_pda_coreb;
|
2009-01-07 23:14:39 +08:00
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
enum ipi_message_type {
|
2013-07-09 15:39:53 +08:00
|
|
|
BFIN_IPI_NONE,
|
2012-07-31 17:28:10 +08:00
|
|
|
BFIN_IPI_TIMER,
|
|
|
|
BFIN_IPI_RESCHEDULE,
|
|
|
|
BFIN_IPI_CALL_FUNC,
|
|
|
|
BFIN_IPI_CPU_STOP,
|
|
|
|
};
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
struct blackfin_flush_data {
|
|
|
|
unsigned long start;
|
|
|
|
unsigned long end;
|
|
|
|
};
|
|
|
|
|
|
|
|
void *secondary_stack;
|
|
|
|
|
|
|
|
static struct blackfin_flush_data smp_flush_data;
|
|
|
|
|
|
|
|
static DEFINE_SPINLOCK(stop_lock);
|
|
|
|
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
/* A magic number - stress test shows this is safe for common cases */
|
|
|
|
#define BFIN_IPI_MSGQ_LEN 5
|
|
|
|
|
|
|
|
/* Simple FIFO buffer, overflow leads to panic */
|
2012-07-31 17:28:10 +08:00
|
|
|
struct ipi_data {
|
2013-07-09 15:39:53 +08:00
|
|
|
atomic_t count;
|
|
|
|
atomic_t bits;
|
2009-01-07 23:14:39 +08:00
|
|
|
};
|
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
static DEFINE_PER_CPU(struct ipi_data, bfin_ipi);
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
static void ipi_cpu_stop(unsigned int cpu)
|
|
|
|
{
|
|
|
|
spin_lock(&stop_lock);
|
|
|
|
printk(KERN_CRIT "CPU%u: stopping\n", cpu);
|
|
|
|
dump_stack();
|
|
|
|
spin_unlock(&stop_lock);
|
|
|
|
|
2011-04-26 09:57:27 +08:00
|
|
|
set_cpu_online(cpu, false);
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
local_irq_disable();
|
|
|
|
|
|
|
|
while (1)
|
|
|
|
SSYNC();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ipi_flush_icache(void *info)
|
|
|
|
{
|
|
|
|
struct blackfin_flush_data *fdata = info;
|
|
|
|
|
|
|
|
/* Invalidate the memory holding the bounds of the flushed region. */
|
2011-04-12 16:16:04 +08:00
|
|
|
blackfin_dcache_invalidate_range((unsigned long)fdata,
|
|
|
|
(unsigned long)fdata + sizeof(*fdata));
|
|
|
|
|
|
|
|
/* Make sure all write buffers in the data side of the core
|
|
|
|
* are flushed before trying to invalidate the icache. This
|
|
|
|
* needs to be after the data flush and before the icache
|
|
|
|
* flush so that the SSYNC does the right thing in preventing
|
|
|
|
* the instruction prefetcher from hitting things in cached
|
|
|
|
* memory at the wrong time -- it runs much further ahead than
|
|
|
|
* the pipeline.
|
|
|
|
*/
|
|
|
|
SSYNC();
|
|
|
|
|
|
|
|
/* ipi_flaush_icache is invoked by generic flush_icache_range,
|
|
|
|
* so call blackfin arch icache flush directly here.
|
|
|
|
*/
|
|
|
|
blackfin_icache_flush_range(fdata->start, fdata->end);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
/* Use IRQ_SUPPLE_0 to request reschedule.
|
|
|
|
* When returning from interrupt to user space,
|
|
|
|
* there is chance to reschedule */
|
|
|
|
static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
|
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
|
|
|
platform_clear_ipi(cpu, IRQ_SUPPLE_0);
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2011-12-12 11:04:05 +08:00
|
|
|
DECLARE_PER_CPU(struct clock_event_device, coretmr_events);
|
|
|
|
void ipi_timer(void)
|
|
|
|
{
|
|
|
|
int cpu = smp_processor_id();
|
|
|
|
struct clock_event_device *evt = &per_cpu(coretmr_events, cpu);
|
|
|
|
evt->event_handler(evt);
|
|
|
|
}
|
|
|
|
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
2012-07-31 17:28:10 +08:00
|
|
|
struct ipi_data *bfin_ipi_data;
|
2009-01-07 23:14:39 +08:00
|
|
|
unsigned int cpu = smp_processor_id();
|
2012-07-31 17:28:10 +08:00
|
|
|
unsigned long pending;
|
|
|
|
unsigned long msg;
|
2009-01-07 23:14:39 +08:00
|
|
|
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
platform_clear_ipi(cpu, IRQ_SUPPLE_1);
|
2009-01-07 23:14:39 +08:00
|
|
|
|
2013-11-15 15:41:35 +08:00
|
|
|
smp_rmb();
|
blackfin: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.
The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
CC: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-18 01:30:52 +08:00
|
|
|
bfin_ipi_data = this_cpu_ptr(&bfin_ipi);
|
2013-07-16 13:25:21 +08:00
|
|
|
while ((pending = atomic_xchg(&bfin_ipi_data->bits, 0)) != 0) {
|
2012-07-31 17:28:10 +08:00
|
|
|
msg = 0;
|
|
|
|
do {
|
|
|
|
msg = find_next_bit(&pending, BITS_PER_LONG, msg + 1);
|
|
|
|
switch (msg) {
|
|
|
|
case BFIN_IPI_TIMER:
|
|
|
|
ipi_timer();
|
|
|
|
break;
|
|
|
|
case BFIN_IPI_RESCHEDULE:
|
|
|
|
scheduler_ipi();
|
|
|
|
break;
|
|
|
|
case BFIN_IPI_CALL_FUNC:
|
|
|
|
generic_smp_call_function_interrupt();
|
|
|
|
break;
|
|
|
|
case BFIN_IPI_CPU_STOP:
|
|
|
|
ipi_cpu_stop(cpu);
|
|
|
|
break;
|
2013-11-15 15:41:35 +08:00
|
|
|
default:
|
|
|
|
goto out;
|
2012-07-31 17:28:10 +08:00
|
|
|
}
|
2013-07-09 15:39:53 +08:00
|
|
|
atomic_dec(&bfin_ipi_data->count);
|
2012-07-31 17:28:10 +08:00
|
|
|
} while (msg < BITS_PER_LONG);
|
2013-11-15 15:41:35 +08:00
|
|
|
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
2013-11-15 15:41:35 +08:00
|
|
|
out:
|
2009-01-07 23:14:39 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
static void bfin_ipi_init(void)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu;
|
2012-07-31 17:28:10 +08:00
|
|
|
struct ipi_data *bfin_ipi_data;
|
2009-01-07 23:14:39 +08:00
|
|
|
for_each_possible_cpu(cpu) {
|
2012-07-31 17:28:10 +08:00
|
|
|
bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
|
2013-07-16 13:25:21 +08:00
|
|
|
atomic_set(&bfin_ipi_data->bits, 0);
|
|
|
|
atomic_set(&bfin_ipi_data->count, 0);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu;
|
2012-07-31 17:28:10 +08:00
|
|
|
struct ipi_data *bfin_ipi_data;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
for_each_cpu(cpu, cpumask) {
|
|
|
|
bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
|
2013-07-09 15:39:53 +08:00
|
|
|
atomic_set_mask((1 << msg), &bfin_ipi_data->bits);
|
|
|
|
atomic_inc(&bfin_ipi_data->count);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
2012-07-31 17:28:10 +08:00
|
|
|
local_irq_restore(flags);
|
2013-11-15 15:41:35 +08:00
|
|
|
smp_wmb();
|
|
|
|
for_each_cpu(cpu, cpumask)
|
|
|
|
platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
}
|
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
void arch_send_call_function_single_ipi(int cpu)
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
{
|
2013-12-16 00:36:28 +08:00
|
|
|
send_ipi(cpumask_of(cpu), BFIN_IPI_CALL_FUNC);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
2012-07-31 17:28:10 +08:00
|
|
|
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
2012-07-31 17:28:10 +08:00
|
|
|
send_ipi(mask, BFIN_IPI_CALL_FUNC);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void smp_send_reschedule(int cpu)
|
|
|
|
{
|
2012-07-31 17:28:10 +08:00
|
|
|
send_ipi(cpumask_of(cpu), BFIN_IPI_RESCHEDULE);
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-12-12 11:04:05 +08:00
|
|
|
void smp_send_msg(const struct cpumask *mask, unsigned long type)
|
|
|
|
{
|
2012-07-31 17:28:10 +08:00
|
|
|
send_ipi(mask, type);
|
2011-12-12 11:04:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void smp_timer_broadcast(const struct cpumask *mask)
|
|
|
|
{
|
|
|
|
smp_send_msg(mask, BFIN_IPI_TIMER);
|
|
|
|
}
|
|
|
|
|
2009-01-07 23:14:39 +08:00
|
|
|
void smp_send_stop(void)
|
|
|
|
{
|
|
|
|
cpumask_t callmap;
|
|
|
|
|
2010-06-25 13:55:16 +08:00
|
|
|
preempt_disable();
|
2011-04-26 09:57:27 +08:00
|
|
|
cpumask_copy(&callmap, cpu_online_mask);
|
|
|
|
cpumask_clear_cpu(smp_processor_id(), &callmap);
|
|
|
|
if (!cpumask_empty(&callmap))
|
2012-07-31 17:28:10 +08:00
|
|
|
send_ipi(&callmap, BFIN_IPI_CPU_STOP);
|
2009-01-07 23:14:39 +08:00
|
|
|
|
2010-06-25 13:55:16 +08:00
|
|
|
preempt_enable();
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-06-19 04:56:21 +08:00
|
|
|
int __cpu_up(unsigned int cpu, struct task_struct *idle)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
|
|
|
int ret;
|
2009-12-28 19:13:51 +08:00
|
|
|
|
2009-01-07 23:14:39 +08:00
|
|
|
secondary_stack = task_stack_page(idle) + THREAD_SIZE;
|
|
|
|
|
|
|
|
ret = platform_boot_secondary(cpu, idle);
|
|
|
|
|
|
|
|
secondary_stack = NULL;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-06-19 04:56:21 +08:00
|
|
|
static void setup_secondary(unsigned int cpu)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
|
|
|
unsigned long ilat;
|
|
|
|
|
|
|
|
bfin_write_IMASK(0);
|
|
|
|
CSYNC();
|
|
|
|
ilat = bfin_read_ILAT();
|
|
|
|
CSYNC();
|
|
|
|
bfin_write_ILAT(ilat);
|
|
|
|
CSYNC();
|
|
|
|
|
|
|
|
/* Enable interrupt levels IVG7-15. IARs have been already
|
|
|
|
* programmed by the boot CPU. */
|
2008-11-18 17:48:22 +08:00
|
|
|
bfin_irq_flags |= IMASK_IVG15 |
|
2009-01-07 23:14:39 +08:00
|
|
|
IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
|
|
|
|
IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
|
|
|
|
}
|
|
|
|
|
2013-06-19 04:56:21 +08:00
|
|
|
void secondary_start_kernel(void)
|
2009-01-07 23:14:39 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
struct mm_struct *mm = &init_mm;
|
|
|
|
|
|
|
|
if (_bfin_swrst & SWRST_DBL_FAULT_B) {
|
|
|
|
printk(KERN_EMERG "CoreB Recovering from DOUBLE FAULT event\n");
|
|
|
|
#ifdef CONFIG_DEBUG_DOUBLEFAULT
|
2011-05-30 11:12:51 +08:00
|
|
|
printk(KERN_EMERG " While handling exception (EXCAUSE = %#x) at %pF\n",
|
|
|
|
initial_pda_coreb.seqstat_doublefault & SEQSTAT_EXCAUSE,
|
|
|
|
initial_pda_coreb.retx_doublefault);
|
|
|
|
printk(KERN_NOTICE " DCPLB_FAULT_ADDR: %pF\n",
|
|
|
|
initial_pda_coreb.dcplb_doublefault_addr);
|
|
|
|
printk(KERN_NOTICE " ICPLB_FAULT_ADDR: %pF\n",
|
|
|
|
initial_pda_coreb.icplb_doublefault_addr);
|
2009-01-07 23:14:39 +08:00
|
|
|
#endif
|
|
|
|
printk(KERN_NOTICE " The instruction at %pF caused a double exception\n",
|
2011-05-30 11:12:51 +08:00
|
|
|
initial_pda_coreb.retx);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We want the D-cache to be enabled early, in case the atomic
|
|
|
|
* support code emulates cache coherence (see
|
|
|
|
* __ARCH_SYNC_CORE_DCACHE).
|
|
|
|
*/
|
|
|
|
init_exception_vectors();
|
|
|
|
|
|
|
|
local_irq_disable();
|
|
|
|
|
|
|
|
/* Attach the new idle task to the global mm. */
|
|
|
|
atomic_inc(&mm->mm_users);
|
|
|
|
atomic_inc(&mm->mm_count);
|
|
|
|
current->active_mm = mm;
|
|
|
|
|
|
|
|
preempt_disable();
|
|
|
|
|
|
|
|
setup_secondary(cpu);
|
|
|
|
|
2009-12-02 15:58:12 +08:00
|
|
|
platform_secondary_init(cpu);
|
2009-12-28 18:21:49 +08:00
|
|
|
/* setup local core timer */
|
|
|
|
bfin_local_timer_setup();
|
|
|
|
|
2009-01-07 23:14:39 +08:00
|
|
|
local_irq_enable();
|
|
|
|
|
2010-09-07 18:08:36 +08:00
|
|
|
bfin_setup_caches(cpu);
|
|
|
|
|
2011-12-12 11:04:05 +08:00
|
|
|
notify_cpu_starting(cpu);
|
2009-12-02 15:58:12 +08:00
|
|
|
/*
|
|
|
|
* Calibrate loops per jiffy value.
|
|
|
|
* IRQs need to be enabled here - D-cache can be invalidated
|
|
|
|
* in timer irq handler, so core B can read correct jiffies.
|
|
|
|
*/
|
|
|
|
calibrate_delay();
|
2009-01-07 23:14:39 +08:00
|
|
|
|
2013-07-09 15:39:53 +08:00
|
|
|
/* We are done with local CPU inits, unblock the boot CPU. */
|
|
|
|
set_cpu_online(cpu, true);
|
2013-03-22 05:49:41 +08:00
|
|
|
cpu_startup_entry(CPUHP_ONLINE);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_prepare_boot_cpu(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_prepare_cpus(unsigned int max_cpus)
|
|
|
|
{
|
|
|
|
platform_prepare_cpus(max_cpus);
|
2012-07-31 17:28:10 +08:00
|
|
|
bfin_ipi_init();
|
Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation. But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic(). So create a per-cpu static array
for the message queue and use that instead.
Further, while we have two supplemental interrupts, we are currently only
using one of them. So use the second one for the most common IPI message
of all -- smp_send_reschedule(). This avoids ugly contention for locks
which in turn would require an IPI message ...
In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before. Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock. If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.
After running some various stress tests on the system, the static limit
of 5 messages seems to work. On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
2009-12-17 16:20:32 +08:00
|
|
|
platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
|
|
|
|
platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_cpus_done(unsigned int max_cpus)
|
|
|
|
{
|
|
|
|
unsigned long bogosum = 0;
|
|
|
|
unsigned int cpu;
|
|
|
|
|
|
|
|
for_each_online_cpu(cpu)
|
2009-07-09 17:58:52 +08:00
|
|
|
bogosum += loops_per_jiffy;
|
2009-01-07 23:14:39 +08:00
|
|
|
|
|
|
|
printk(KERN_INFO "SMP: Total of %d processors activated "
|
|
|
|
"(%lu.%02lu BogoMIPS).\n",
|
|
|
|
num_online_cpus(),
|
|
|
|
bogosum / (500000/HZ),
|
|
|
|
(bogosum / (5000/HZ)) % 100);
|
|
|
|
}
|
|
|
|
|
|
|
|
void smp_icache_flush_range_others(unsigned long start, unsigned long end)
|
|
|
|
{
|
|
|
|
smp_flush_data.start = start;
|
|
|
|
smp_flush_data.end = end;
|
|
|
|
|
2011-11-25 14:25:30 +08:00
|
|
|
preempt_disable();
|
|
|
|
if (smp_call_function(&ipi_flush_icache, &smp_flush_data, 1))
|
2009-01-07 23:14:39 +08:00
|
|
|
printk(KERN_WARNING "SMP: failed to run I-cache flush request on other CPUs\n");
|
2011-11-25 14:25:30 +08:00
|
|
|
preempt_enable();
|
2009-01-07 23:14:39 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(smp_icache_flush_range_others);
|
|
|
|
|
2009-06-10 16:57:08 +08:00
|
|
|
#ifdef __ARCH_SYNC_CORE_ICACHE
|
2010-02-01 14:07:50 +08:00
|
|
|
unsigned long icache_invld_count[NR_CPUS];
|
2009-06-10 16:57:08 +08:00
|
|
|
void resync_core_icache(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu = get_cpu();
|
|
|
|
blackfin_invalidate_entire_icache();
|
2010-02-01 14:07:50 +08:00
|
|
|
icache_invld_count[cpu]++;
|
2009-06-10 16:57:08 +08:00
|
|
|
put_cpu();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(resync_core_icache);
|
|
|
|
#endif
|
|
|
|
|
2009-01-07 23:14:39 +08:00
|
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
2010-02-01 14:07:50 +08:00
|
|
|
unsigned long dcache_invld_count[NR_CPUS];
|
2009-01-07 23:14:39 +08:00
|
|
|
unsigned long barrier_mask __attribute__ ((__section__(".l2.bss")));
|
|
|
|
|
|
|
|
void resync_core_dcache(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu = get_cpu();
|
|
|
|
blackfin_invalidate_entire_dcache();
|
2010-02-01 14:07:50 +08:00
|
|
|
dcache_invld_count[cpu]++;
|
2009-01-07 23:14:39 +08:00
|
|
|
put_cpu();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(resync_core_dcache);
|
|
|
|
#endif
|
2009-12-28 19:13:51 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
2013-06-19 04:56:21 +08:00
|
|
|
int __cpu_disable(void)
|
2009-12-28 19:13:51 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (cpu == 0)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
set_cpu_online(cpu, false);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-19 04:56:21 +08:00
|
|
|
int __cpu_die(unsigned int cpu)
|
2009-12-28 19:13:51 +08:00
|
|
|
{
|
2015-02-27 06:28:25 +08:00
|
|
|
return cpu_wait_death(cpu, 5);
|
2009-12-28 19:13:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void cpu_die(void)
|
|
|
|
{
|
2015-02-27 06:28:25 +08:00
|
|
|
(void)cpu_report_death();
|
2009-12-28 19:13:51 +08:00
|
|
|
|
|
|
|
atomic_dec(&init_mm.mm_users);
|
|
|
|
atomic_dec(&init_mm.mm_count);
|
|
|
|
|
|
|
|
local_irq_disable();
|
|
|
|
platform_cpu_die();
|
|
|
|
}
|
|
|
|
#endif
|