linux/arch/xtensa/kernel/perf_event.c
Max Filippov 38fef73c21 xtensa: implement fake NMI
In case perf IRQ is the highest of the medium-level IRQs, and is alone
on its level, it may be treated as NMI:
- LOCKLEVEL is defined to be one level less than EXCM level,
- IRQ masking never lowers current IRQ level,
- new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
  IRQ; new second level exception handler, do_nmi, assigned to it
  handles it as NMI,
- atomic operations in configurations without s32c1i still need to mask
  all interrupts.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
2015-08-17 07:33:39 +03:00

455 lines
12 KiB
C

/*
* Xtensa Performance Monitor Module driver
* See Tensilica Debug User's Guide for PMU registers documentation.
*
* Copyright (C) 2015 Cadence Design Systems Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/interrupt.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <asm/processor.h>
#include <asm/stacktrace.h>
/* Global control/status for all perf counters */
#define XTENSA_PMU_PMG 0x1000
/* Perf counter values */
#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4)
/* Perf counter control registers */
#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4)
/* Perf counter status registers */
#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4)
#define XTENSA_PMU_PMG_PMEN 0x1
#define XTENSA_PMU_COUNTER_MASK 0xffffffffULL
#define XTENSA_PMU_COUNTER_MAX 0x7fffffff
#define XTENSA_PMU_PMCTRL_INTEN 0x00000001
#define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008
#define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0
#define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8
#define XTENSA_PMU_PMCTRL_SELECT 0x00001f00
#define XTENSA_PMU_PMCTRL_MASK_SHIFT 16
#define XTENSA_PMU_PMCTRL_MASK 0xffff0000
#define XTENSA_PMU_MASK(select, mask) \
(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
XTENSA_PMU_PMCTRL_TRACELEVEL | \
XTENSA_PMU_PMCTRL_INTEN)
#define XTENSA_PMU_PMSTAT_OVFL 0x00000001
#define XTENSA_PMU_PMSTAT_INTASRT 0x00000010
struct xtensa_pmu_events {
/* Array of events currently on this core */
struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
/* Bitmap of used hardware counters */
unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
};
static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
static const u32 xtensa_hw_ctl[] = {
[PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1),
[PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff),
[PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1),
[PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1),
/* Taken and non-taken branches + taken loop ends */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490),
/* Instruction-related + other global stall cycles */
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
/* Data-related global stall cycles */
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff),
};
#define C(_x) PERF_COUNT_HW_CACHE_##_x
static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1),
[C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2),
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1),
[C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2),
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1),
[C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2),
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1),
[C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8),
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1),
[C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8),
},
},
};
static int xtensa_pmu_cache_event(u64 config)
{
unsigned int cache_type, cache_op, cache_result;
int ret;
cache_type = (config >> 0) & 0xff;
cache_op = (config >> 8) & 0xff;
cache_result = (config >> 16) & 0xff;
if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
cache_op >= C(OP_MAX) ||
cache_result >= C(RESULT_MAX))
return -EINVAL;
ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
if (ret == 0)
return -EINVAL;
return ret;
}
static inline uint32_t xtensa_pmu_read_counter(int idx)
{
return get_er(XTENSA_PMU_PM(idx));
}
static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
{
set_er(v, XTENSA_PMU_PM(idx));
}
static void xtensa_perf_event_update(struct perf_event *event,
struct hw_perf_event *hwc, int idx)
{
uint64_t prev_raw_count, new_raw_count;
int64_t delta;
do {
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count);
delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
}
static bool xtensa_perf_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc, int idx)
{
bool rc = false;
s64 left;
if (!is_sampling_event(event)) {
left = XTENSA_PMU_COUNTER_MAX;
} else {
s64 period = hwc->sample_period;
left = local64_read(&hwc->period_left);
if (left <= -period) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
rc = true;
} else if (left <= 0) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
rc = true;
}
if (left > XTENSA_PMU_COUNTER_MAX)
left = XTENSA_PMU_COUNTER_MAX;
}
local64_set(&hwc->prev_count, -left);
xtensa_pmu_write_counter(idx, -left);
perf_event_update_userpage(event);
return rc;
}
static void xtensa_pmu_enable(struct pmu *pmu)
{
set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
}
static void xtensa_pmu_disable(struct pmu *pmu)
{
set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
}
static int xtensa_pmu_event_init(struct perf_event *event)
{
int ret;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
xtensa_hw_ctl[event->attr.config] == 0)
return -EINVAL;
event->hw.config = xtensa_hw_ctl[event->attr.config];
return 0;
case PERF_TYPE_HW_CACHE:
ret = xtensa_pmu_cache_event(event->attr.config);
if (ret < 0)
return ret;
event->hw.config = ret;
return 0;
case PERF_TYPE_RAW:
/* Not 'previous counter' select */
if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
(1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
return -EINVAL;
event->hw.config = (event->attr.config &
(XTENSA_PMU_PMCTRL_KRNLCNT |
XTENSA_PMU_PMCTRL_TRACELEVEL |
XTENSA_PMU_PMCTRL_SELECT |
XTENSA_PMU_PMCTRL_MASK)) |
XTENSA_PMU_PMCTRL_INTEN;
return 0;
default:
return -ENOENT;
}
}
/*
* Starts/Stops a counter present on the PMU. The PMI handler
* should stop the counter when perf_event_overflow() returns
* !0. ->start() will be used to continue.
*/
static void xtensa_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (WARN_ON_ONCE(idx == -1))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
xtensa_perf_event_set_period(event, hwc, idx);
}
hwc->state = 0;
set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
}
static void xtensa_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (!(hwc->state & PERF_HES_STOPPED)) {
set_er(0, XTENSA_PMU_PMCTRL(idx));
set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
XTENSA_PMU_PMSTAT(idx));
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) &&
!(event->hw.state & PERF_HES_UPTODATE)) {
xtensa_perf_event_update(event, &event->hw, idx);
event->hw.state |= PERF_HES_UPTODATE;
}
}
/*
* Adds/Removes a counter to/from the PMU, can be done inside
* a transaction, see the ->*_txn() methods.
*/
static int xtensa_pmu_add(struct perf_event *event, int flags)
{
struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (__test_and_set_bit(idx, ev->used_mask)) {
idx = find_first_zero_bit(ev->used_mask,
XCHAL_NUM_PERF_COUNTERS);
if (idx == XCHAL_NUM_PERF_COUNTERS)
return -EAGAIN;
__set_bit(idx, ev->used_mask);
hwc->idx = idx;
}
ev->event[idx] = event;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (flags & PERF_EF_START)
xtensa_pmu_start(event, PERF_EF_RELOAD);
perf_event_update_userpage(event);
return 0;
}
static void xtensa_pmu_del(struct perf_event *event, int flags)
{
struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
xtensa_pmu_stop(event, PERF_EF_UPDATE);
__clear_bit(event->hw.idx, ev->used_mask);
perf_event_update_userpage(event);
}
static void xtensa_pmu_read(struct perf_event *event)
{
xtensa_perf_event_update(event, &event->hw, event->hw.idx);
}
static int callchain_trace(struct stackframe *frame, void *data)
{
struct perf_callchain_entry *entry = data;
perf_callchain_store(entry, frame->pc);
return 0;
}
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
callchain_trace, NULL, entry);
}
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
callchain_trace, entry);
}
void perf_event_print_debug(void)
{
unsigned long flags;
unsigned i;
local_irq_save(flags);
pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
get_er(XTENSA_PMU_PMG));
for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
i, get_er(XTENSA_PMU_PM(i)),
i, get_er(XTENSA_PMU_PMCTRL(i)),
i, get_er(XTENSA_PMU_PMSTAT(i)));
local_irq_restore(flags);
}
irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
{
irqreturn_t rc = IRQ_NONE;
struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
unsigned i;
for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
i < XCHAL_NUM_PERF_COUNTERS;
i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
struct perf_event *event = ev->event[i];
struct hw_perf_event *hwc = &event->hw;
u64 last_period;
if (!(v & XTENSA_PMU_PMSTAT_OVFL))
continue;
set_er(v, XTENSA_PMU_PMSTAT(i));
xtensa_perf_event_update(event, hwc, i);
last_period = hwc->last_period;
if (xtensa_perf_event_set_period(event, hwc, i)) {
struct perf_sample_data data;
struct pt_regs *regs = get_irq_regs();
perf_sample_data_init(&data, 0, last_period);
if (perf_event_overflow(event, &data, regs))
xtensa_pmu_stop(event, 0);
}
rc = IRQ_HANDLED;
}
return rc;
}
static struct pmu xtensa_pmu = {
.pmu_enable = xtensa_pmu_enable,
.pmu_disable = xtensa_pmu_disable,
.event_init = xtensa_pmu_event_init,
.add = xtensa_pmu_add,
.del = xtensa_pmu_del,
.start = xtensa_pmu_start,
.stop = xtensa_pmu_stop,
.read = xtensa_pmu_read,
};
static void xtensa_pmu_setup(void)
{
unsigned i;
set_er(0, XTENSA_PMU_PMG);
for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
set_er(0, XTENSA_PMU_PMCTRL(i));
set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
}
}
static int xtensa_pmu_notifier(struct notifier_block *self,
unsigned long action, void *data)
{
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_STARTING:
xtensa_pmu_setup();
break;
default:
break;
}
return NOTIFY_OK;
}
static int __init xtensa_pmu_init(void)
{
int ret;
int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
perf_cpu_notifier(xtensa_pmu_notifier);
#if XTENSA_FAKE_NMI
enable_irq(irq);
#else
ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
"pmu", NULL);
if (ret < 0)
return ret;
#endif
ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
if (ret)
free_irq(irq, NULL);
return ret;
}
early_initcall(xtensa_pmu_init);