mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-25 23:34:47 +08:00
Merge patch series "riscv: Allow userspace to directly access perf counters"
Alexandre Ghiti <alexghiti@rivosinc.com> says: riscv used to allow direct access to cycle/time/instret counters, bypassing the perf framework, this patchset intends to allow the user to mmap any counter when accessed through perf. **Important**: The default mode is now user access through perf only, not the legacy so some applications will break. However, we introduce a sysctl perf_user_access like arm64 does, which will allow to switch to the legacy mode described above. This version needs openSBI v1.3 *and* a kernel fix that went upstream lately (https://lore.kernel.org/lkml/20230616114831.3186980-1-maz@kernel.org/T/). * b4-shazam-merge: perf: tests: Adapt mmap-basic.c for riscv tools: lib: perf: Implement riscv mmap support Documentation: admin-guide: Add riscv sysctl_perf_user_access drivers: perf: Implement perf event mmap support in the SBI backend drivers: perf: Implement perf event mmap support in the legacy backend riscv: Prepare for user-space perf event mmap support drivers: perf: Rename riscv pmu sbi driver riscv: Make legacy counter enum match the HW numbering include: riscv: Fix wrong include guard in riscv_pmu.h perf: Fix wrong comment about default event_idx Link: https://lore.kernel.org/r/20230802080328.1213905-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
commit
7aa7d502e4
@ -941,16 +941,35 @@ enabled, otherwise writing to this file will return ``-EBUSY``.
|
||||
The default value is 8.
|
||||
|
||||
|
||||
perf_user_access (arm64 only)
|
||||
=================================
|
||||
perf_user_access (arm64 and riscv only)
|
||||
=======================================
|
||||
|
||||
Controls user space access for reading perf event counters. When set to 1,
|
||||
user space can read performance monitor counter registers directly.
|
||||
Controls user space access for reading perf event counters.
|
||||
|
||||
arm64
|
||||
=====
|
||||
|
||||
The default value is 0 (access disabled).
|
||||
|
||||
When set to 1, user space can read performance monitor counter registers
|
||||
directly.
|
||||
|
||||
See Documentation/arch/arm64/perf.rst for more information.
|
||||
|
||||
riscv
|
||||
=====
|
||||
|
||||
When set to 0, user space access is disabled.
|
||||
|
||||
The default value is 1, user space can read performance monitor counter
|
||||
registers through perf, any direct access without perf intervention will trigger
|
||||
an illegal instruction.
|
||||
|
||||
When set to 2, which enables legacy mode (user space has direct access to cycle
|
||||
and insret CSRs only). Note that this legacy value is deprecated and will be
|
||||
removed once all user space applications are fixed.
|
||||
|
||||
Note that the time CSR is always directly accessible to all modes.
|
||||
|
||||
pid_max
|
||||
=======
|
||||
|
@ -14,9 +14,81 @@
|
||||
#include <linux/perf/riscv_pmu.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched_clock.h>
|
||||
|
||||
#include <asm/sbi.h>
|
||||
|
||||
static bool riscv_perf_user_access(struct perf_event *event)
|
||||
{
|
||||
return ((event->attr.type == PERF_TYPE_HARDWARE) ||
|
||||
(event->attr.type == PERF_TYPE_HW_CACHE) ||
|
||||
(event->attr.type == PERF_TYPE_RAW)) &&
|
||||
!!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
|
||||
}
|
||||
|
||||
void arch_perf_update_userpage(struct perf_event *event,
|
||||
struct perf_event_mmap_page *userpg, u64 now)
|
||||
{
|
||||
struct clock_read_data *rd;
|
||||
unsigned int seq;
|
||||
u64 ns;
|
||||
|
||||
userpg->cap_user_time = 0;
|
||||
userpg->cap_user_time_zero = 0;
|
||||
userpg->cap_user_time_short = 0;
|
||||
userpg->cap_user_rdpmc = riscv_perf_user_access(event);
|
||||
|
||||
#ifdef CONFIG_RISCV_PMU
|
||||
/*
|
||||
* The counters are 64-bit but the priv spec doesn't mandate all the
|
||||
* bits to be implemented: that's why, counter width can vary based on
|
||||
* the cpu vendor.
|
||||
*/
|
||||
if (userpg->cap_user_rdpmc)
|
||||
userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
|
||||
#endif
|
||||
|
||||
do {
|
||||
rd = sched_clock_read_begin(&seq);
|
||||
|
||||
userpg->time_mult = rd->mult;
|
||||
userpg->time_shift = rd->shift;
|
||||
userpg->time_zero = rd->epoch_ns;
|
||||
userpg->time_cycles = rd->epoch_cyc;
|
||||
userpg->time_mask = rd->sched_clock_mask;
|
||||
|
||||
/*
|
||||
* Subtract the cycle base, such that software that
|
||||
* doesn't know about cap_user_time_short still 'works'
|
||||
* assuming no wraps.
|
||||
*/
|
||||
ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
|
||||
userpg->time_zero -= ns;
|
||||
|
||||
} while (sched_clock_read_retry(seq));
|
||||
|
||||
userpg->time_offset = userpg->time_zero - now;
|
||||
|
||||
/*
|
||||
* time_shift is not expected to be greater than 31 due to
|
||||
* the original published conversion algorithm shifting a
|
||||
* 32-bit value (now specifies a 64-bit value) - refer
|
||||
* perf_event_mmap_page documentation in perf_event.h.
|
||||
*/
|
||||
if (userpg->time_shift == 32) {
|
||||
userpg->time_shift = 31;
|
||||
userpg->time_mult >>= 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal timekeeping for enabled/running/stopped times
|
||||
* is always computed with the sched_clock.
|
||||
*/
|
||||
userpg->cap_user_time = 1;
|
||||
userpg->cap_user_time_zero = 1;
|
||||
userpg->cap_user_time_short = 1;
|
||||
}
|
||||
|
||||
static unsigned long csr_read_num(int csr_num)
|
||||
{
|
||||
#define switchcase_csr_read(__csr_num, __val) {\
|
||||
@ -171,6 +243,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
|
||||
|
||||
local64_set(&hwc->prev_count, (u64)-left);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
@ -267,6 +341,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
|
||||
hwc->idx = -1;
|
||||
hwc->event_base = mapped_event;
|
||||
|
||||
if (rvpmu->event_init)
|
||||
rvpmu->event_init(event);
|
||||
|
||||
if (!is_sampling_event(event)) {
|
||||
/*
|
||||
* For non-sampling runs, limit the sample_period to half
|
||||
@ -283,6 +360,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int riscv_pmu_event_idx(struct perf_event *event)
|
||||
{
|
||||
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
|
||||
|
||||
if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
|
||||
return 0;
|
||||
|
||||
if (rvpmu->csr_index)
|
||||
return rvpmu->csr_index(event) + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
|
||||
|
||||
if (rvpmu->event_mapped) {
|
||||
rvpmu->event_mapped(event, mm);
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
}
|
||||
|
||||
static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
|
||||
|
||||
if (rvpmu->event_unmapped) {
|
||||
rvpmu->event_unmapped(event, mm);
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
}
|
||||
|
||||
struct riscv_pmu *riscv_pmu_alloc(void)
|
||||
{
|
||||
struct riscv_pmu *pmu;
|
||||
@ -307,6 +417,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
|
||||
}
|
||||
pmu->pmu = (struct pmu) {
|
||||
.event_init = riscv_pmu_event_init,
|
||||
.event_mapped = riscv_pmu_event_mapped,
|
||||
.event_unmapped = riscv_pmu_event_unmapped,
|
||||
.event_idx = riscv_pmu_event_idx,
|
||||
.add = riscv_pmu_add,
|
||||
.del = riscv_pmu_del,
|
||||
.start = riscv_pmu_start,
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#define RISCV_PMU_LEGACY_CYCLE 0
|
||||
#define RISCV_PMU_LEGACY_INSTRET 1
|
||||
#define RISCV_PMU_LEGACY_INSTRET 2
|
||||
|
||||
static bool pmu_init_done;
|
||||
|
||||
@ -71,6 +71,29 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
|
||||
local64_set(&hwc->prev_count, initial_val);
|
||||
}
|
||||
|
||||
static uint8_t pmu_legacy_csr_index(struct perf_event *event)
|
||||
{
|
||||
return event->hw.idx;
|
||||
}
|
||||
|
||||
static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
|
||||
event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
|
||||
return;
|
||||
|
||||
event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
|
||||
}
|
||||
|
||||
static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
|
||||
event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
|
||||
return;
|
||||
|
||||
event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is just a simple implementation to allow legacy implementations
|
||||
* compatible with new RISC-V PMU driver framework.
|
||||
@ -91,6 +114,9 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
|
||||
pmu->ctr_get_width = NULL;
|
||||
pmu->ctr_clear_idx = NULL;
|
||||
pmu->ctr_read = pmu_legacy_read_ctr;
|
||||
pmu->event_mapped = pmu_legacy_event_mapped;
|
||||
pmu->event_unmapped = pmu_legacy_event_unmapped;
|
||||
pmu->csr_index = pmu_legacy_csr_index;
|
||||
|
||||
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
|
||||
}
|
||||
|
@ -24,6 +24,14 @@
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#define SYSCTL_NO_USER_ACCESS 0
|
||||
#define SYSCTL_USER_ACCESS 1
|
||||
#define SYSCTL_LEGACY 2
|
||||
|
||||
#define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
|
||||
#define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
|
||||
#define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-47");
|
||||
PMU_FORMAT_ATTR(firmware, "config:63");
|
||||
|
||||
@ -43,6 +51,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Allow user mode access by default */
|
||||
static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
|
||||
|
||||
/*
|
||||
* RISC-V doesn't have heterogeneous harts yet. This need to be part of
|
||||
* per_cpu in case of harts with different pmu counters
|
||||
@ -301,6 +312,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
|
||||
|
||||
static uint8_t pmu_sbi_csr_index(struct perf_event *event)
|
||||
{
|
||||
return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
|
||||
}
|
||||
|
||||
static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
|
||||
{
|
||||
unsigned long cflags = 0;
|
||||
@ -329,18 +345,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
|
||||
struct sbiret ret;
|
||||
int idx;
|
||||
uint64_t cbase = 0;
|
||||
uint64_t cbase = 0, cmask = rvpmu->cmask;
|
||||
unsigned long cflags = 0;
|
||||
|
||||
cflags = pmu_sbi_get_filter_flags(event);
|
||||
|
||||
/*
|
||||
* In legacy mode, we have to force the fixed counters for those events
|
||||
* but not in the user access mode as we want to use the other counters
|
||||
* that support sampling/filtering.
|
||||
*/
|
||||
if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
|
||||
if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
|
||||
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
|
||||
cmask = 1;
|
||||
} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
|
||||
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
|
||||
cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
|
||||
}
|
||||
}
|
||||
|
||||
/* retrieve the available counter index */
|
||||
#if defined(CONFIG_32BIT)
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
|
||||
rvpmu->cmask, cflags, hwc->event_base, hwc->config,
|
||||
cmask, cflags, hwc->event_base, hwc->config,
|
||||
hwc->config >> 32);
|
||||
#else
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
|
||||
rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0);
|
||||
cmask, cflags, hwc->event_base, hwc->config, 0);
|
||||
#endif
|
||||
if (ret.error) {
|
||||
pr_debug("Not able to find a counter for event %lx config %llx\n",
|
||||
@ -474,6 +506,22 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
|
||||
return val;
|
||||
}
|
||||
|
||||
static void pmu_sbi_set_scounteren(void *arg)
|
||||
{
|
||||
struct perf_event *event = (struct perf_event *)arg;
|
||||
|
||||
csr_write(CSR_SCOUNTEREN,
|
||||
csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
|
||||
}
|
||||
|
||||
static void pmu_sbi_reset_scounteren(void *arg)
|
||||
{
|
||||
struct perf_event *event = (struct perf_event *)arg;
|
||||
|
||||
csr_write(CSR_SCOUNTEREN,
|
||||
csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
|
||||
}
|
||||
|
||||
static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
|
||||
{
|
||||
struct sbiret ret;
|
||||
@ -490,6 +538,10 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
|
||||
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
|
||||
pr_err("Starting counter idx %d failed with error %d\n",
|
||||
hwc->idx, sbi_err_map_linux_errno(ret.error));
|
||||
|
||||
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
|
||||
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
|
||||
pmu_sbi_set_scounteren((void *)event);
|
||||
}
|
||||
|
||||
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
|
||||
@ -497,6 +549,10 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
|
||||
struct sbiret ret;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
|
||||
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
|
||||
pmu_sbi_reset_scounteren((void *)event);
|
||||
|
||||
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
|
||||
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
|
||||
flag != SBI_PMU_STOP_FLAG_RESET)
|
||||
@ -704,10 +760,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
|
||||
|
||||
/*
|
||||
* Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
|
||||
* as is necessary to maintain uABI compatibility.
|
||||
* We keep enabling userspace access to CYCLE, TIME and INSTRET via the
|
||||
* legacy option but that will be removed in the future.
|
||||
*/
|
||||
csr_write(CSR_SCOUNTEREN, 0x7);
|
||||
if (sysctl_perf_user_access == SYSCTL_LEGACY)
|
||||
csr_write(CSR_SCOUNTEREN, 0x7);
|
||||
else
|
||||
csr_write(CSR_SCOUNTEREN, 0x2);
|
||||
|
||||
/* Stop all the counters so that they can be enabled from perf */
|
||||
pmu_sbi_stop_all(pmu);
|
||||
@ -838,6 +897,121 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
|
||||
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
|
||||
}
|
||||
|
||||
static void pmu_sbi_event_init(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* The permissions are set at event_init so that we do not depend
|
||||
* on the sysctl value that can change.
|
||||
*/
|
||||
if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
|
||||
event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
|
||||
else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
|
||||
event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
|
||||
else
|
||||
event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
|
||||
}
|
||||
|
||||
static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
|
||||
return;
|
||||
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
|
||||
if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
|
||||
event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The user mmapped the event to directly access it: this is where
|
||||
* we determine based on sysctl_perf_user_access if we grant userspace
|
||||
* the direct access to this event. That means that within the same
|
||||
* task, some events may be directly accessible and some other may not,
|
||||
* if the user changes the value of sysctl_perf_user_accesss in the
|
||||
* meantime.
|
||||
*/
|
||||
|
||||
event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
|
||||
|
||||
/*
|
||||
* We must enable userspace access *before* advertising in the user page
|
||||
* that it is possible to do so to avoid any race.
|
||||
* And we must notify all cpus here because threads that currently run
|
||||
* on other cpus will try to directly access the counter too without
|
||||
* calling pmu_sbi_ctr_start.
|
||||
*/
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
|
||||
on_each_cpu_mask(mm_cpumask(mm),
|
||||
pmu_sbi_set_scounteren, (void *)event, 1);
|
||||
}
|
||||
|
||||
static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
|
||||
return;
|
||||
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
|
||||
if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
|
||||
event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we can directly remove user access since the user does not have
|
||||
* access to the user page anymore so we avoid the racy window where the
|
||||
* user could have read cap_user_rdpmc to true right before we disable
|
||||
* it.
|
||||
*/
|
||||
event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
|
||||
|
||||
if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
|
||||
on_each_cpu_mask(mm_cpumask(mm),
|
||||
pmu_sbi_reset_scounteren, (void *)event, 1);
|
||||
}
|
||||
|
||||
static void riscv_pmu_update_counter_access(void *info)
|
||||
{
|
||||
if (sysctl_perf_user_access == SYSCTL_LEGACY)
|
||||
csr_write(CSR_SCOUNTEREN, 0x7);
|
||||
else
|
||||
csr_write(CSR_SCOUNTEREN, 0x2);
|
||||
}
|
||||
|
||||
static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
|
||||
int write, void *buffer,
|
||||
size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int prev = sysctl_perf_user_access;
|
||||
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
/*
|
||||
* Test against the previous value since we clear SCOUNTEREN when
|
||||
* sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
|
||||
* not do that if that was already the case.
|
||||
*/
|
||||
if (ret || !write || prev == sysctl_perf_user_access)
|
||||
return ret;
|
||||
|
||||
on_each_cpu(riscv_pmu_update_counter_access, NULL, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ctl_table sbi_pmu_sysctl_table[] = {
|
||||
{
|
||||
.procname = "perf_user_access",
|
||||
.data = &sysctl_perf_user_access,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = riscv_pmu_proc_user_access_handler,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_TWO,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
static int pmu_sbi_device_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct riscv_pmu *pmu = NULL;
|
||||
@ -881,6 +1055,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
|
||||
pmu->ctr_get_width = pmu_sbi_ctr_get_width;
|
||||
pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
|
||||
pmu->ctr_read = pmu_sbi_ctr_read;
|
||||
pmu->event_init = pmu_sbi_event_init;
|
||||
pmu->event_mapped = pmu_sbi_event_mapped;
|
||||
pmu->event_unmapped = pmu_sbi_event_unmapped;
|
||||
pmu->csr_index = pmu_sbi_csr_index;
|
||||
|
||||
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
|
||||
if (ret)
|
||||
@ -894,6 +1072,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
|
||||
if (ret)
|
||||
goto out_unregister;
|
||||
|
||||
register_sysctl("kernel", sbi_pmu_sysctl_table);
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister:
|
||||
@ -907,7 +1087,7 @@ out_free:
|
||||
static struct platform_driver pmu_sbi_driver = {
|
||||
.probe = pmu_sbi_device_probe,
|
||||
.driver = {
|
||||
.name = RISCV_PMU_PDEV_NAME,
|
||||
.name = RISCV_PMU_SBI_PDEV_NAME,
|
||||
},
|
||||
};
|
||||
|
||||
@ -934,7 +1114,7 @@ static int __init pmu_sbi_devinit(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0);
|
||||
pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
|
||||
if (IS_ERR(pdev)) {
|
||||
platform_driver_unregister(&pmu_sbi_driver);
|
||||
return PTR_ERR(pdev);
|
||||
|
@ -6,8 +6,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ASM_RISCV_PERF_EVENT_H
|
||||
#define _ASM_RISCV_PERF_EVENT_H
|
||||
#ifndef _RISCV_PMU_H
|
||||
#define _RISCV_PMU_H
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ptrace.h>
|
||||
@ -21,7 +21,7 @@
|
||||
|
||||
#define RISCV_MAX_COUNTERS 64
|
||||
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
|
||||
#define RISCV_PMU_PDEV_NAME "riscv-pmu"
|
||||
#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi"
|
||||
#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy"
|
||||
|
||||
#define RISCV_PMU_STOP_FLAG_RESET 1
|
||||
@ -55,6 +55,10 @@ struct riscv_pmu {
|
||||
void (*ctr_start)(struct perf_event *event, u64 init_val);
|
||||
void (*ctr_stop)(struct perf_event *event, unsigned long flag);
|
||||
int (*event_map)(struct perf_event *event, u64 *config);
|
||||
void (*event_init)(struct perf_event *event);
|
||||
void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
|
||||
void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
|
||||
uint8_t (*csr_index)(struct perf_event *event);
|
||||
|
||||
struct cpu_hw_events __percpu *hw_events;
|
||||
struct hlist_node node;
|
||||
@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
|
||||
|
||||
#endif /* CONFIG_RISCV_PMU */
|
||||
|
||||
#endif /* _ASM_RISCV_PERF_EVENT_H */
|
||||
#endif /* _RISCV_PMU_H */
|
||||
|
@ -445,7 +445,8 @@ struct pmu {
|
||||
|
||||
/*
|
||||
* Will return the value for perf_event_mmap_page::index for this event,
|
||||
* if no implementation is provided it will default to: event->hw.idx + 1.
|
||||
* if no implementation is provided it will default to 0 (see
|
||||
* perf_event_idx_default).
|
||||
*/
|
||||
int (*event_idx) (struct perf_event *event); /*optional */
|
||||
|
||||
|
@ -392,6 +392,72 @@ static u64 read_perf_counter(unsigned int counter)
|
||||
|
||||
static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
|
||||
|
||||
/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
|
||||
/* TODO: implement rv32 support */
|
||||
|
||||
#define CSR_CYCLE 0xc00
|
||||
#define CSR_TIME 0xc01
|
||||
|
||||
#define csr_read(csr) \
|
||||
({ \
|
||||
register unsigned long __v; \
|
||||
__asm__ __volatile__ ("csrr %0, %1" \
|
||||
: "=r" (__v) \
|
||||
: "i" (csr) : ); \
|
||||
__v; \
|
||||
})
|
||||
|
||||
static unsigned long csr_read_num(int csr_num)
|
||||
{
|
||||
#define switchcase_csr_read(__csr_num, __val) {\
|
||||
case __csr_num: \
|
||||
__val = csr_read(__csr_num); \
|
||||
break; }
|
||||
#define switchcase_csr_read_2(__csr_num, __val) {\
|
||||
switchcase_csr_read(__csr_num + 0, __val) \
|
||||
switchcase_csr_read(__csr_num + 1, __val)}
|
||||
#define switchcase_csr_read_4(__csr_num, __val) {\
|
||||
switchcase_csr_read_2(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_2(__csr_num + 2, __val)}
|
||||
#define switchcase_csr_read_8(__csr_num, __val) {\
|
||||
switchcase_csr_read_4(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_4(__csr_num + 4, __val)}
|
||||
#define switchcase_csr_read_16(__csr_num, __val) {\
|
||||
switchcase_csr_read_8(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_8(__csr_num + 8, __val)}
|
||||
#define switchcase_csr_read_32(__csr_num, __val) {\
|
||||
switchcase_csr_read_16(__csr_num + 0, __val) \
|
||||
switchcase_csr_read_16(__csr_num + 16, __val)}
|
||||
|
||||
unsigned long ret = 0;
|
||||
|
||||
switch (csr_num) {
|
||||
switchcase_csr_read_32(CSR_CYCLE, ret)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
#undef switchcase_csr_read_32
|
||||
#undef switchcase_csr_read_16
|
||||
#undef switchcase_csr_read_8
|
||||
#undef switchcase_csr_read_4
|
||||
#undef switchcase_csr_read_2
|
||||
#undef switchcase_csr_read
|
||||
}
|
||||
|
||||
static u64 read_perf_counter(unsigned int counter)
|
||||
{
|
||||
return csr_read_num(CSR_CYCLE + counter);
|
||||
}
|
||||
|
||||
static u64 read_timestamp(void)
|
||||
{
|
||||
return csr_read_num(CSR_TIME);
|
||||
}
|
||||
|
||||
#else
|
||||
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
|
||||
static u64 read_timestamp(void) { return 0; }
|
||||
|
@ -284,7 +284,8 @@ static struct test_case tests__basic_mmap[] = {
|
||||
"permissions"),
|
||||
TEST_CASE_REASON("User space counter reading of instructions",
|
||||
mmap_user_read_instr,
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \
|
||||
(defined(__riscv) && __riscv_xlen == 64)
|
||||
"permissions"
|
||||
#else
|
||||
"unsupported"
|
||||
@ -292,7 +293,8 @@ static struct test_case tests__basic_mmap[] = {
|
||||
),
|
||||
TEST_CASE_REASON("User space counter reading of cycles",
|
||||
mmap_user_read_cycles,
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \
|
||||
(defined(__riscv) && __riscv_xlen == 64)
|
||||
"permissions"
|
||||
#else
|
||||
"unsupported"
|
||||
|
Loading…
Reference in New Issue
Block a user