perf: riscv: preliminary RISC-V support

This patch provide a basic PMU, riscv_base_pmu, which supports two
general hardware event, instructions and cycles.  Furthermore, this
PMU serves as a reference implementation to ease the portings in
the future.

riscv_base_pmu should be able to run on any RISC-V machine that
conforms to the Priv-Spec.  Note that the latest qemu model hasn't
fully support a proper behavior of Priv-Spec 1.10 yet, but work
around should be easy with very small fixes.  Please check
https://github.com/riscv/riscv-qemu/pull/115 for future updates.

Cc: Nick Hu <nickhu@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Signed-off-by: Alan Kao <alankao@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
This commit is contained in:
Alan Kao 2018-04-20 07:27:49 +08:00 committed by Palmer Dabbelt
parent 29dcea8877
commit 178e9fc47a
No known key found for this signature in database
GPG Key ID: EF4CA1502CCBAB41
5 changed files with 586 additions and 0 deletions

View File

@ -25,6 +25,7 @@ config RISCV
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select NO_BOOTMEM
select RISCV_ISA_A if SMP
@ -198,6 +199,19 @@ config RISCV_ISA_C
config RISCV_ISA_A
def_bool y
menu "supported PMU type"
depends on PERF_EVENTS
config RISCV_BASE_PMU
bool "Base Performance Monitoring Unit"
def_bool y
help
A base PMU that serves as a reference implementation and has limited
feature of perf. It can run on any RISC-V machines so serves as the
fallback, but this option can also be disable to reduce kernel size.
endmenu
endmenu
menu "Kernel type"

View File

@ -25,6 +25,7 @@ generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
generic-y += mman.h
generic-y += module.h

View File

@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018 SiFive
* Copyright (C) 2018 Andes Technology Corporation
*
*/
#ifndef _ASM_RISCV_PERF_EVENT_H
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#define RISCV_BASE_COUNTERS 2
/*
* The RISCV_MAX_COUNTERS parameter should be specified.
*/
#ifdef CONFIG_RISCV_BASE_PMU
#define RISCV_MAX_COUNTERS 2
#endif
#ifndef RISCV_MAX_COUNTERS
#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU."
#endif
/*
* These are the indexes of bits in counteren register *minus* 1,
* except for cycle. It would be coherent if it can directly mapped
* to counteren bit definition, but there is a *time* register at
* counteren[1]. Per-cpu structure is scarce resource here.
*
* According to the spec, an implementation can support counter up to
* mhpmcounter31, but many high-end processors has at most 6 general
* PMCs, we give the definition to MHPMCOUNTER8 here.
*/
#define RISCV_PMU_CYCLE 0
#define RISCV_PMU_INSTRET 1
#define RISCV_PMU_MHPMCOUNTER3 2
#define RISCV_PMU_MHPMCOUNTER4 3
#define RISCV_PMU_MHPMCOUNTER5 4
#define RISCV_PMU_MHPMCOUNTER6 5
#define RISCV_PMU_MHPMCOUNTER7 6
#define RISCV_PMU_MHPMCOUNTER8 7
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
struct cpu_hw_events {
/* # currently enabled events*/
int n_events;
/* currently enabled events */
struct perf_event *events[RISCV_MAX_COUNTERS];
/* vendor-defined PMU data */
void *platform;
};
struct riscv_pmu {
struct pmu *pmu;
/* generic hw/cache events table */
const int *hw_events;
const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
/* method used to map hw/cache events */
int (*map_hw_event)(u64 config);
int (*map_cache_event)(u64 config);
/* max generic hw events in map */
int max_events;
/* number total counters, 2(base) + x(general) */
int num_counters;
/* the width of the counter */
int counter_width;
/* vendor-defined PMU features */
void *platform;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
int irq;
};
#endif /* _ASM_RISCV_PERF_EVENT_H */

View File

@ -39,4 +39,6 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
clean:

View File

@ -0,0 +1,485 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
* Copyright 2014 Tilera Corporation. All Rights Reserved.
* Copyright (C) 2018 Andes Technology Corporation
*
* Perf_events support for RISC-V platforms.
*
* Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
* functionality for perf event to fully work, this file provides
* the very basic framework only.
*
* For platform portings, please check Documentations/riscv/pmu.txt.
*
* The Copyright line includes x86 and tile ones.
*/
#include <linux/kprobes.h>
#include <linux/kernel.h>
#include <linux/kdebug.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/perf_event.h>
#include <linux/atomic.h>
#include <linux/of.h>
#include <asm/perf_event.h>
static const struct riscv_pmu *riscv_pmu __read_mostly;
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
/*
* Hardware & cache maps and their methods
*/
static const int riscv_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
[PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
[PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
[PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
[C(RESULT_MISS)] = RISCV_OP_UNSUPP,
},
},
};
static int riscv_map_hw_event(u64 config)
{
if (config >= riscv_pmu->max_events)
return -EINVAL;
return riscv_pmu->hw_events[config];
}
int riscv_map_cache_decode(u64 config, unsigned int *type,
unsigned int *op, unsigned int *result)
{
return -ENOENT;
}
static int riscv_map_cache_event(u64 config)
{
unsigned int type, op, result;
int err = -ENOENT;
int code;
err = riscv_map_cache_decode(config, &type, &op, &result);
if (!riscv_pmu->cache_events || err)
return err;
if (type >= PERF_COUNT_HW_CACHE_MAX ||
op >= PERF_COUNT_HW_CACHE_OP_MAX ||
result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
code = (*riscv_pmu->cache_events)[type][op][result];
if (code == RISCV_OP_UNSUPP)
return -EINVAL;
return code;
}
/*
* Low-level functions: reading/writing counters
*/
static inline u64 read_counter(int idx)
{
u64 val = 0;
switch (idx) {
case RISCV_PMU_CYCLE:
val = csr_read(cycle);
break;
case RISCV_PMU_INSTRET:
val = csr_read(instret);
break;
default:
WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
return -EINVAL;
}
return val;
}
static inline void write_counter(int idx, u64 value)
{
/* currently not supported */
WARN_ON_ONCE(1);
}
/*
* pmu->read: read and update the counter
*
* Other architectures' implementation often have a xxx_perf_event_update
* routine, which can return counter values when called in the IRQ, but
* return void when being called by the pmu->read method.
*/
static void riscv_pmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;
u64 oldval;
int idx = hwc->idx;
u64 delta;
do {
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = read_counter(idx);
oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count);
} while (oldval != prev_raw_count);
/*
* delta is the value to update the counter we maintain in the kernel.
*/
delta = (new_raw_count - prev_raw_count) &
((1ULL << riscv_pmu->counter_width) - 1);
local64_add(delta, &event->count);
/*
* Something like local64_sub(delta, &hwc->period_left) here is
* needed if there is an interrupt for perf.
*/
}
/*
* State transition functions:
*
* stop()/start() & add()/del()
*/
/*
* pmu->stop: stop the counter
*/
static void riscv_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
riscv_pmu->pmu->read(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
/*
* pmu->start: start the event.
*/
static void riscv_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
/*
* Set the counter to the period to the next interrupt here,
* if you have any.
*/
}
hwc->state = 0;
perf_event_update_userpage(event);
/*
* Since we cannot write to counters, this serves as an initialization
* to the delta-mechanism in pmu->read(); otherwise, the delta would be
* wrong when pmu->read is called for the first time.
*/
local64_set(&hwc->prev_count, read_counter(hwc->idx));
}
/*
* pmu->add: add the event to PMU.
*/
static int riscv_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (cpuc->n_events == riscv_pmu->num_counters)
return -ENOSPC;
/*
* We don't have general conunters, so no binding-event-to-counter
* process here.
*
* Indexing using hwc->config generally not works, since config may
* contain extra information, but here the only info we have in
* hwc->config is the event index.
*/
hwc->idx = hwc->config;
cpuc->events[hwc->idx] = event;
cpuc->n_events++;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (flags & PERF_EF_START)
riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
return 0;
}
/*
* pmu->del: delete the event from PMU.
*/
static void riscv_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
cpuc->events[hwc->idx] = NULL;
cpuc->n_events--;
riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
perf_event_update_userpage(event);
}
/*
* Interrupt: a skeletion for reference.
*/
static DEFINE_MUTEX(pmc_reserve_mutex);
irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
{
return IRQ_NONE;
}
static int reserve_pmc_hardware(void)
{
int err = 0;
mutex_lock(&pmc_reserve_mutex);
if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
IRQF_PERCPU, "riscv-base-perf", NULL);
}
mutex_unlock(&pmc_reserve_mutex);
return err;
}
void release_pmc_hardware(void)
{
mutex_lock(&pmc_reserve_mutex);
if (riscv_pmu->irq >= 0)
free_irq(riscv_pmu->irq, NULL);
mutex_unlock(&pmc_reserve_mutex);
}
/*
* Event Initialization/Finalization
*/
static atomic_t riscv_active_events = ATOMIC_INIT(0);
static void riscv_event_destroy(struct perf_event *event)
{
if (atomic_dec_return(&riscv_active_events) == 0)
release_pmc_hardware();
}
static int riscv_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
int err;
int code;
if (atomic_inc_return(&riscv_active_events) == 1) {
err = reserve_pmc_hardware();
if (err) {
pr_warn("PMC hardware not available\n");
atomic_dec(&riscv_active_events);
return -EBUSY;
}
}
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
code = riscv_pmu->map_hw_event(attr->config);
break;
case PERF_TYPE_HW_CACHE:
code = riscv_pmu->map_cache_event(attr->config);
break;
case PERF_TYPE_RAW:
return -EOPNOTSUPP;
default:
return -ENOENT;
}
event->destroy = riscv_event_destroy;
if (code < 0) {
event->destroy(event);
return code;
}
/*
* idx is set to -1 because the index of a general event should not be
* decided until binding to some counter in pmu->add().
*
* But since we don't have such support, later in pmu->add(), we just
* use hwc->config as the index instead.
*/
hwc->config = code;
hwc->idx = -1;
return 0;
}
/*
* Initialization
*/
static struct pmu min_pmu = {
.name = "riscv-base",
.event_init = riscv_event_init,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
.stop = riscv_pmu_stop,
.read = riscv_pmu_read,
};
static const struct riscv_pmu riscv_base_pmu = {
.pmu = &min_pmu,
.max_events = ARRAY_SIZE(riscv_hw_event_map),
.map_hw_event = riscv_map_hw_event,
.hw_events = riscv_hw_event_map,
.map_cache_event = riscv_map_cache_event,
.cache_events = &riscv_cache_event_map,
.counter_width = 63,
.num_counters = RISCV_BASE_COUNTERS + 0,
.handle_irq = &riscv_base_pmu_handle_irq,
/* This means this PMU has no IRQ. */
.irq = -1,
};
static const struct of_device_id riscv_pmu_of_ids[] = {
{.compatible = "riscv,base-pmu", .data = &riscv_base_pmu},
{ /* sentinel value */ }
};
int __init init_hw_perf_events(void)
{
struct device_node *node = of_find_node_by_type(NULL, "pmu");
const struct of_device_id *of_id;
riscv_pmu = &riscv_base_pmu;
if (node) {
of_id = of_match_node(riscv_pmu_of_ids, node);
if (of_id)
riscv_pmu = of_id->data;
}
perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
return 0;
}
arch_initcall(init_hw_perf_events);