2008-12-03 17:39:53 +08:00
|
|
|
/*
|
|
|
|
* Performance counter x86 architecture code
|
|
|
|
*
|
|
|
|
* Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
|
2009-02-27 20:39:09 +08:00
|
|
|
* Copyright(C) 2009 Jaswinder Singh Rajput
|
2008-12-03 17:39:53 +08:00
|
|
|
*
|
|
|
|
* For licencing details see kernel-base/COPYING
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/perf_counter.h>
|
|
|
|
#include <linux/capability.h>
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <linux/kprobes.h>
|
2008-12-10 04:43:39 +08:00
|
|
|
#include <linux/module.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
#include <linux/kdebug.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
|
2008-12-17 16:02:19 +08:00
|
|
|
#include <asm/perf_counter.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
#include <asm/apic.h>
|
|
|
|
|
|
|
|
static bool perf_counters_initialized __read_mostly;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Number of (generic) HW counters:
|
|
|
|
*/
|
2008-12-17 20:09:20 +08:00
|
|
|
static int nr_counters_generic __read_mostly;
|
|
|
|
static u64 perf_counter_mask __read_mostly;
|
2008-12-22 18:10:42 +08:00
|
|
|
static u64 counter_value_mask __read_mostly;
|
2009-03-06 01:08:27 +08:00
|
|
|
static int counter_value_bits __read_mostly;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
static int nr_counters_fixed __read_mostly;
|
2008-12-17 17:51:15 +08:00
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
struct cpu_hw_counters {
|
2008-12-17 20:09:20 +08:00
|
|
|
struct perf_counter *counters[X86_PMC_IDX_MAX];
|
|
|
|
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2009-01-23 21:36:16 +08:00
|
|
|
unsigned long interrupts;
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 throttle_ctrl;
|
|
|
|
u64 active_mask;
|
|
|
|
int enabled;
|
2008-12-03 17:39:53 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2009-02-27 20:39:09 +08:00
|
|
|
* struct pmc_x86_ops - performance counter x86 ops
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2009-02-27 20:39:09 +08:00
|
|
|
struct pmc_x86_ops {
|
2009-02-28 21:07:49 +08:00
|
|
|
u64 (*save_disable_all)(void);
|
2009-03-06 01:08:27 +08:00
|
|
|
void (*restore_all)(u64);
|
|
|
|
u64 (*get_status)(u64);
|
|
|
|
void (*ack_status)(u64);
|
|
|
|
void (*enable)(int, u64);
|
|
|
|
void (*disable)(int, u64);
|
2009-02-28 21:07:49 +08:00
|
|
|
unsigned eventsel;
|
|
|
|
unsigned perfctr;
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 (*event_map)(int);
|
|
|
|
u64 (*raw_event)(u64);
|
2009-02-28 21:07:49 +08:00
|
|
|
int max_events;
|
2009-02-27 20:39:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct pmc_x86_ops *pmc_ops;
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
|
|
|
|
.enabled = 1,
|
|
|
|
};
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
/*
|
|
|
|
* Intel PerfMon v3. Used on Core2 and later.
|
|
|
|
*/
|
2009-03-06 01:08:27 +08:00
|
|
|
static const u64 intel_perfmon_event_map[] =
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2008-12-23 19:17:29 +08:00
|
|
|
[PERF_COUNT_CPU_CYCLES] = 0x003c,
|
2008-12-03 17:39:53 +08:00
|
|
|
[PERF_COUNT_INSTRUCTIONS] = 0x00c0,
|
|
|
|
[PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
|
|
|
|
[PERF_COUNT_CACHE_MISSES] = 0x412e,
|
|
|
|
[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
|
|
|
|
[PERF_COUNT_BRANCH_MISSES] = 0x00c5,
|
2008-12-23 19:17:29 +08:00
|
|
|
[PERF_COUNT_BUS_CYCLES] = 0x013c,
|
2008-12-03 17:39:53 +08:00
|
|
|
};
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static u64 pmc_intel_event_map(int event)
|
2009-02-27 20:39:09 +08:00
|
|
|
{
|
|
|
|
return intel_perfmon_event_map[event];
|
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static u64 pmc_intel_raw_event(u64 event)
|
|
|
|
{
|
|
|
|
#define CORE_EVNTSEL_EVENT_MASK 0x000000FF
|
|
|
|
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00
|
|
|
|
#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000
|
|
|
|
|
|
|
|
#define CORE_EVNTSEL_MASK \
|
|
|
|
(CORE_EVNTSEL_EVENT_MASK | \
|
|
|
|
CORE_EVNTSEL_UNIT_MASK | \
|
|
|
|
CORE_EVNTSEL_COUNTER_MASK)
|
|
|
|
|
|
|
|
return event & CORE_EVNTSEL_MASK;
|
|
|
|
}
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
/*
|
|
|
|
* AMD Performance Monitor K7 and later.
|
|
|
|
*/
|
2009-03-06 01:08:27 +08:00
|
|
|
static const u64 amd_perfmon_event_map[] =
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
{
|
|
|
|
[PERF_COUNT_CPU_CYCLES] = 0x0076,
|
|
|
|
[PERF_COUNT_INSTRUCTIONS] = 0x00c0,
|
|
|
|
[PERF_COUNT_CACHE_REFERENCES] = 0x0080,
|
|
|
|
[PERF_COUNT_CACHE_MISSES] = 0x0081,
|
|
|
|
[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
|
|
|
|
[PERF_COUNT_BRANCH_MISSES] = 0x00c5,
|
|
|
|
};
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static u64 pmc_amd_event_map(int event)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
{
|
|
|
|
return amd_perfmon_event_map[event];
|
|
|
|
}
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static u64 pmc_amd_raw_event(u64 event)
|
|
|
|
{
|
|
|
|
#define K7_EVNTSEL_EVENT_MASK 0x7000000FF
|
|
|
|
#define K7_EVNTSEL_UNIT_MASK 0x00000FF00
|
|
|
|
#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000
|
|
|
|
|
|
|
|
#define K7_EVNTSEL_MASK \
|
|
|
|
(K7_EVNTSEL_EVENT_MASK | \
|
|
|
|
K7_EVNTSEL_UNIT_MASK | \
|
|
|
|
K7_EVNTSEL_COUNTER_MASK)
|
|
|
|
|
|
|
|
return event & K7_EVNTSEL_MASK;
|
|
|
|
}
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
|
|
|
* Propagate counter elapsed time into the generic counter.
|
|
|
|
* Can only be executed on the CPU where the counter is active.
|
|
|
|
* Returns the delta events processed.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
x86_perf_counter_update(struct perf_counter *counter,
|
|
|
|
struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
|
|
|
u64 prev_raw_count, new_raw_count, delta;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Careful: an NMI might modify the previous counter value.
|
|
|
|
*
|
|
|
|
* Our tactic to handle this is to first atomically read and
|
|
|
|
* exchange a new raw count - then add that new-prev delta
|
|
|
|
* count to the generic counter atomically:
|
|
|
|
*/
|
|
|
|
again:
|
|
|
|
prev_raw_count = atomic64_read(&hwc->prev_count);
|
|
|
|
rdmsrl(hwc->counter_base + idx, new_raw_count);
|
|
|
|
|
|
|
|
if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
|
|
|
new_raw_count) != prev_raw_count)
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we have the new raw value and have updated the prev
|
|
|
|
* timestamp already. We can now calculate the elapsed delta
|
|
|
|
* (counter-)time and add that to the generic counter.
|
|
|
|
*
|
|
|
|
* Careful, not all hw sign-extends above the physical width
|
|
|
|
* of the count, so we do that by clipping the delta to 32 bits:
|
|
|
|
*/
|
|
|
|
delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
|
|
|
|
|
|
|
|
atomic64_add(delta, &counter->count);
|
|
|
|
atomic64_sub(delta, &hwc->period_left);
|
|
|
|
}
|
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
|
|
|
* Setup the hardware configuration for a given hw_event_type
|
|
|
|
*/
|
2008-12-11 19:46:46 +08:00
|
|
|
static int __hw_perf_counter_init(struct perf_counter *counter)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2008-12-10 19:33:23 +08:00
|
|
|
struct perf_counter_hw_event *hw_event = &counter->hw_event;
|
2008-12-03 17:39:53 +08:00
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
|
|
|
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
* Generate PMC IRQs:
|
2008-12-03 17:39:53 +08:00
|
|
|
* (keep 'enabled' bit clear for now)
|
|
|
|
*/
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
* Count user and OS events unless requested not to.
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
if (!hw_event->exclude_user)
|
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
|
|
|
|
if (!hw_event->exclude_kernel)
|
2008-12-03 17:39:53 +08:00
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If privileged enough, allow NMI events:
|
|
|
|
*/
|
|
|
|
hwc->nmi = 0;
|
|
|
|
if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
|
|
|
|
hwc->nmi = 1;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-10 19:33:23 +08:00
|
|
|
hwc->irq_period = hw_event->irq_period;
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
|
|
|
* Intel PMCs cannot be accessed sanely above 32 bit width,
|
|
|
|
* so we install an artificial 1<<31 period regardless of
|
|
|
|
* the generic counter period:
|
|
|
|
*/
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
|
|
if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
|
|
|
|
hwc->irq_period = 0x7FFFFFFF;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
atomic64_set(&hwc->period_left, hwc->irq_period);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
/*
|
2008-12-09 02:35:37 +08:00
|
|
|
* Raw event type provide the config in the event structure
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2008-12-10 19:33:23 +08:00
|
|
|
if (hw_event->raw) {
|
2009-03-06 01:08:27 +08:00
|
|
|
hwc->config |= pmc_ops->raw_event(hw_event->type);
|
2008-12-03 17:39:53 +08:00
|
|
|
} else {
|
2009-02-27 20:39:09 +08:00
|
|
|
if (hw_event->type >= pmc_ops->max_events)
|
2008-12-03 17:39:53 +08:00
|
|
|
return -EINVAL;
|
|
|
|
/*
|
|
|
|
* The generic map:
|
|
|
|
*/
|
2009-02-27 20:39:09 +08:00
|
|
|
hwc->config |= pmc_ops->event_map(hw_event->type);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
counter->wakeup_pending = 0;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
static u64 pmc_intel_save_disable_all(void)
|
2008-12-10 04:43:39 +08:00
|
|
|
{
|
|
|
|
u64 ctrl;
|
|
|
|
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
|
2008-12-17 20:09:20 +08:00
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
2008-12-15 01:36:30 +08:00
|
|
|
|
2008-12-10 04:43:39 +08:00
|
|
|
return ctrl;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2009-02-27 20:39:09 +08:00
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
static u64 pmc_amd_save_disable_all(void)
|
|
|
|
{
|
2009-03-06 01:08:27 +08:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
int enabled, idx;
|
|
|
|
|
|
|
|
enabled = cpuc->enabled;
|
|
|
|
cpuc->enabled = 0;
|
|
|
|
barrier();
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
|
|
|
|
for (idx = 0; idx < nr_counters_generic; idx++) {
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 val;
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
2009-03-06 01:08:27 +08:00
|
|
|
if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
|
|
|
|
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
|
|
|
}
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
return enabled;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
u64 hw_perf_save_disable(void)
|
|
|
|
{
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return pmc_ops->save_disable_all();
|
|
|
|
}
|
2009-03-06 01:08:27 +08:00
|
|
|
/*
|
|
|
|
* Exported because of ACPI idle
|
|
|
|
*/
|
2008-12-11 20:45:51 +08:00
|
|
|
EXPORT_SYMBOL_GPL(hw_perf_save_disable);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
static void pmc_intel_restore_all(u64 ctrl)
|
|
|
|
{
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
|
|
|
|
}
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
static void pmc_amd_restore_all(u64 ctrl)
|
|
|
|
{
|
2009-03-06 01:08:27 +08:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
int idx;
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
cpuc->enabled = ctrl;
|
|
|
|
barrier();
|
|
|
|
if (!ctrl)
|
|
|
|
return;
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
for (idx = 0; idx < nr_counters_generic; idx++) {
|
2009-03-06 01:08:27 +08:00
|
|
|
if (test_bit(idx, (unsigned long *)&cpuc->active_mask)) {
|
|
|
|
u64 val;
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
|
|
|
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
void hw_perf_restore(u64 ctrl)
|
|
|
|
{
|
2008-12-15 01:36:30 +08:00
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return;
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
pmc_ops->restore_all(ctrl);
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
2009-03-06 01:08:27 +08:00
|
|
|
/*
|
|
|
|
* Exported because of ACPI idle
|
|
|
|
*/
|
2008-12-13 16:00:03 +08:00
|
|
|
EXPORT_SYMBOL_GPL(hw_perf_restore);
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
static u64 pmc_intel_get_status(u64 mask)
|
|
|
|
{
|
|
|
|
u64 status;
|
|
|
|
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 pmc_amd_get_status(u64 mask)
|
|
|
|
{
|
|
|
|
u64 status = 0;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
for (idx = 0; idx < nr_counters_generic; idx++) {
|
|
|
|
s64 val;
|
|
|
|
|
|
|
|
if (!(mask & (1 << idx)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rdmsrl(MSR_K7_PERFCTR0 + idx, val);
|
|
|
|
val <<= (64 - counter_value_bits);
|
|
|
|
if (val >= 0)
|
|
|
|
status |= (1 << idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 hw_perf_get_status(u64 mask)
|
|
|
|
{
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return pmc_ops->get_status(mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_intel_ack_status(u64 ack)
|
|
|
|
{
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_amd_ack_status(u64 ack)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hw_perf_ack_status(u64 ack)
|
|
|
|
{
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return;
|
|
|
|
|
|
|
|
pmc_ops->ack_status(ack);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_intel_enable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
|
|
|
|
config | ARCH_PERFMON_EVENTSEL0_ENABLE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_amd_enable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
set_bit(idx, (unsigned long *)&cpuc->active_mask);
|
|
|
|
if (cpuc->enabled)
|
|
|
|
config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hw_perf_enable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return;
|
|
|
|
|
|
|
|
pmc_ops->enable(idx, config);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_intel_disable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pmc_amd_disable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
clear_bit(idx, (unsigned long *)&cpuc->active_mask);
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hw_perf_disable(int idx, u64 config)
|
|
|
|
{
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return;
|
|
|
|
|
|
|
|
pmc_ops->disable(idx, config);
|
|
|
|
}
|
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
static inline void
|
|
|
|
__pmc_fixed_disable(struct perf_counter *counter,
|
|
|
|
struct hw_perf_counter *hwc, unsigned int __idx)
|
|
|
|
{
|
|
|
|
int idx = __idx - X86_PMC_IDX_FIXED;
|
|
|
|
u64 ctrl_val, mask;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
ctrl_val &= ~mask;
|
|
|
|
err = checking_wrmsrl(hwc->config_base, ctrl_val);
|
|
|
|
}
|
|
|
|
|
2008-12-09 18:40:46 +08:00
|
|
|
static inline void
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_disable(struct perf_counter *counter,
|
2008-12-13 16:00:03 +08:00
|
|
|
struct hw_perf_counter *hwc, unsigned int idx)
|
2008-12-09 18:40:46 +08:00
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
|
2008-12-27 21:45:43 +08:00
|
|
|
__pmc_fixed_disable(counter, hwc, idx);
|
|
|
|
else
|
2009-03-06 01:08:27 +08:00
|
|
|
hw_perf_disable(idx, hwc->config);
|
2008-12-09 18:40:46 +08:00
|
|
|
}
|
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
|
|
|
* Set the next IRQ period, based on the hwc->period_left value.
|
|
|
|
* To be called with the counter disabled in hw:
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
__hw_perf_counter_set_period(struct perf_counter *counter,
|
|
|
|
struct hw_perf_counter *hwc, int idx)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
s64 left = atomic64_read(&hwc->period_left);
|
2008-12-13 16:00:03 +08:00
|
|
|
s32 period = hwc->irq_period;
|
2008-12-22 18:10:42 +08:00
|
|
|
int err;
|
2008-12-13 16:00:03 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we are way outside a reasoable range then just skip forward:
|
|
|
|
*/
|
|
|
|
if (unlikely(left <= -period)) {
|
|
|
|
left = period;
|
|
|
|
atomic64_set(&hwc->period_left, left);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(left <= 0)) {
|
|
|
|
left += period;
|
|
|
|
atomic64_set(&hwc->period_left, left);
|
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
per_cpu(prev_left[idx], smp_processor_id()) = left;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The hw counter starts counting from this counter offset,
|
|
|
|
* mark it to be able to extra future deltas:
|
|
|
|
*/
|
2008-12-22 18:10:42 +08:00
|
|
|
atomic64_set(&hwc->prev_count, (u64)-left);
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
err = checking_wrmsrl(hwc->counter_base + idx,
|
|
|
|
(u64)(-left) & counter_value_mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__pmc_fixed_enable(struct perf_counter *counter,
|
|
|
|
struct hw_perf_counter *hwc, unsigned int __idx)
|
|
|
|
{
|
|
|
|
int idx = __idx - X86_PMC_IDX_FIXED;
|
|
|
|
u64 ctrl_val, bits, mask;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
* Enable IRQ generation (0x8),
|
|
|
|
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
|
|
|
|
* if requested:
|
2008-12-22 18:10:42 +08:00
|
|
|
*/
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 11:35:35 +08:00
|
|
|
bits = 0x8ULL;
|
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
|
|
|
|
bits |= 0x2;
|
2008-12-22 18:10:42 +08:00
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
|
|
|
|
bits |= 0x1;
|
|
|
|
bits <<= (idx * 4);
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
ctrl_val &= ~mask;
|
|
|
|
ctrl_val |= bits;
|
|
|
|
err = checking_wrmsrl(hwc->config_base, ctrl_val);
|
2008-12-09 18:40:46 +08:00
|
|
|
}
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
static void
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_enable(struct perf_counter *counter,
|
2008-12-13 16:00:03 +08:00
|
|
|
struct hw_perf_counter *hwc, int idx)
|
2008-12-09 18:40:46 +08:00
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
|
2008-12-27 21:45:43 +08:00
|
|
|
__pmc_fixed_enable(counter, hwc, idx);
|
|
|
|
else
|
2009-03-06 01:08:27 +08:00
|
|
|
hw_perf_enable(idx, hwc->config);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
static int
|
|
|
|
fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
|
2008-12-17 20:09:20 +08:00
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
unsigned int event;
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
|
|
|
|
return -1;
|
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
if (unlikely(hwc->nmi))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
|
2008-12-22 18:10:42 +08:00
|
|
|
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
|
2009-02-27 20:39:09 +08:00
|
|
|
if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
|
2008-12-22 18:10:42 +08:00
|
|
|
return X86_PMC_IDX_FIXED_CPU_CYCLES;
|
2009-02-27 20:39:09 +08:00
|
|
|
if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
|
2008-12-22 18:10:42 +08:00
|
|
|
return X86_PMC_IDX_FIXED_BUS_CYCLES;
|
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
|
|
|
* Find a PMC slot for the freshly enabled / scheduled in counter:
|
|
|
|
*/
|
2008-12-21 20:50:42 +08:00
|
|
|
static int pmc_generic_enable(struct perf_counter *counter)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
2008-12-22 18:10:42 +08:00
|
|
|
int idx;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
idx = fixed_mode_idx(counter, hwc);
|
|
|
|
if (idx >= 0) {
|
|
|
|
/*
|
|
|
|
* Try to get the fixed counter, if that is already taken
|
|
|
|
* then try to get a generic counter:
|
|
|
|
*/
|
|
|
|
if (test_and_set_bit(idx, cpuc->used))
|
|
|
|
goto try_generic;
|
2008-12-23 19:28:12 +08:00
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
|
|
|
/*
|
|
|
|
* We set it so that counter_base + idx in wrmsr/rdmsr maps to
|
|
|
|
* MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
|
|
|
|
*/
|
|
|
|
hwc->counter_base =
|
|
|
|
MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
|
2008-12-03 17:39:53 +08:00
|
|
|
hwc->idx = idx;
|
2008-12-22 18:10:42 +08:00
|
|
|
} else {
|
|
|
|
idx = hwc->idx;
|
|
|
|
/* Try to get the previous generic counter again */
|
|
|
|
if (test_and_set_bit(idx, cpuc->used)) {
|
|
|
|
try_generic:
|
|
|
|
idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
|
|
|
|
if (idx == nr_counters_generic)
|
|
|
|
return -EAGAIN;
|
|
|
|
|
|
|
|
set_bit(idx, cpuc->used);
|
|
|
|
hwc->idx = idx;
|
|
|
|
}
|
2009-02-27 20:39:09 +08:00
|
|
|
hwc->config_base = pmc_ops->eventsel;
|
|
|
|
hwc->counter_base = pmc_ops->perfctr;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
perf_counters_lapic_init(hwc->nmi);
|
|
|
|
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_disable(counter, hwc, idx);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
cpuc->counters[idx] = counter;
|
2008-12-22 18:10:42 +08:00
|
|
|
/*
|
|
|
|
* Make it visible before enabling the hw:
|
|
|
|
*/
|
|
|
|
smp_wmb();
|
2008-12-09 18:40:46 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
__hw_perf_counter_set_period(counter, hwc, idx);
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_enable(counter, hwc, idx);
|
2008-12-21 20:50:42 +08:00
|
|
|
|
|
|
|
return 0;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void perf_counter_print_debug(void)
|
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
|
2008-12-23 19:28:12 +08:00
|
|
|
struct cpu_hw_counters *cpuc;
|
2008-12-09 19:18:18 +08:00
|
|
|
int cpu, idx;
|
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
if (!nr_counters_generic)
|
2008-12-09 19:18:18 +08:00
|
|
|
return;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
local_irq_disable();
|
|
|
|
|
|
|
|
cpu = smp_processor_id();
|
2008-12-23 19:28:12 +08:00
|
|
|
cpuc = &per_cpu(cpu_hw_counters, cpu);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
|
2009-02-28 21:15:39 +08:00
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
|
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
|
|
|
|
|
|
|
|
pr_info("\n");
|
|
|
|
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
|
|
|
|
pr_info("CPU#%d: status: %016llx\n", cpu, status);
|
|
|
|
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
|
|
|
|
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
for (idx = 0; idx < nr_counters_generic; idx++) {
|
2009-02-27 20:39:09 +08:00
|
|
|
rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
|
|
|
|
rdmsrl(pmc_ops->perfctr + idx, pmc_count);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
prev_left = per_cpu(prev_left[idx], cpu);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
|
2008-12-03 17:39:53 +08:00
|
|
|
cpu, idx, pmc_ctrl);
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
|
2008-12-03 17:39:53 +08:00
|
|
|
cpu, idx, pmc_count);
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
|
2008-12-13 16:00:03 +08:00
|
|
|
cpu, idx, prev_left);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2008-12-22 18:10:42 +08:00
|
|
|
for (idx = 0; idx < nr_counters_fixed; idx++) {
|
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
|
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
|
2008-12-22 18:10:42 +08:00
|
|
|
cpu, idx, pmc_count);
|
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
local_irq_enable();
|
|
|
|
}
|
|
|
|
|
2008-12-17 16:09:13 +08:00
|
|
|
static void pmc_generic_disable(struct perf_counter *counter)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
|
|
|
unsigned int idx = hwc->idx;
|
|
|
|
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_disable(counter, hwc, idx);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
clear_bit(idx, cpuc->used);
|
2008-12-17 20:09:20 +08:00
|
|
|
cpuc->counters[idx] = NULL;
|
2008-12-22 18:10:42 +08:00
|
|
|
/*
|
|
|
|
* Make sure the cleared pointer becomes visible before we
|
|
|
|
* (potentially) free the counter:
|
|
|
|
*/
|
|
|
|
smp_wmb();
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
|
|
|
* Drain the remaining delta count out of a counter
|
|
|
|
* that we are disabling:
|
|
|
|
*/
|
|
|
|
x86_perf_counter_update(counter, hwc, idx);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_store_irq_data(struct perf_counter *counter, u64 data)
|
|
|
|
{
|
|
|
|
struct perf_data *irqdata = counter->irqdata;
|
|
|
|
|
|
|
|
if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
|
|
|
|
irqdata->overrun++;
|
|
|
|
} else {
|
|
|
|
u64 *p = (u64 *) &irqdata->data[irqdata->len];
|
|
|
|
|
|
|
|
*p = data;
|
|
|
|
irqdata->len += sizeof(u64);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-09 18:40:46 +08:00
|
|
|
/*
|
2008-12-13 16:00:03 +08:00
|
|
|
* Save and restart an expired counter. Called by NMI contexts,
|
|
|
|
* so it has to be careful about preempting normal counter ops:
|
2008-12-09 18:40:46 +08:00
|
|
|
*/
|
2008-12-03 17:39:53 +08:00
|
|
|
static void perf_save_and_restart(struct perf_counter *counter)
|
|
|
|
{
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
|
|
|
int idx = hwc->idx;
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
x86_perf_counter_update(counter, hwc, idx);
|
|
|
|
__hw_perf_counter_set_period(counter, hwc, idx);
|
2008-12-09 18:40:46 +08:00
|
|
|
|
2008-12-22 18:10:42 +08:00
|
|
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
|
2008-12-17 16:09:13 +08:00
|
|
|
__pmc_generic_enable(counter, hwc, idx);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2008-12-11 15:38:42 +08:00
|
|
|
perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2008-12-11 15:38:42 +08:00
|
|
|
struct perf_counter *counter, *group_leader = sibling->group_leader;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-11 15:38:42 +08:00
|
|
|
/*
|
2008-12-13 16:00:03 +08:00
|
|
|
* Store sibling timestamps (if any):
|
2008-12-11 15:38:42 +08:00
|
|
|
*/
|
|
|
|
list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
|
2008-12-22 18:10:42 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
|
2008-12-11 15:38:42 +08:00
|
|
|
perf_store_irq_data(sibling, counter->hw_event.type);
|
2008-12-13 16:00:03 +08:00
|
|
|
perf_store_irq_data(sibling, atomic64_read(&counter->count));
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-23 21:36:16 +08:00
|
|
|
/*
|
|
|
|
* Maximum interrupt frequency of 100KHz per CPU
|
|
|
|
*/
|
2009-02-28 21:07:49 +08:00
|
|
|
#define PERFMON_MAX_INTERRUPTS (100000/HZ)
|
2009-01-23 21:36:16 +08:00
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
|
|
|
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
|
|
|
* rules apply:
|
|
|
|
*/
|
2009-03-06 01:08:27 +08:00
|
|
|
static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
|
|
|
int bit, cpu = smp_processor_id();
|
2009-01-23 21:36:16 +08:00
|
|
|
u64 ack, status;
|
2009-01-23 17:13:01 +08:00
|
|
|
struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
|
2009-03-06 01:08:27 +08:00
|
|
|
int ret = 0;
|
2008-12-09 19:23:59 +08:00
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
cpuc->throttle_ctrl = hw_perf_save_disable();
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
status = hw_perf_get_status(cpuc->throttle_ctrl);
|
2008-12-08 21:20:16 +08:00
|
|
|
if (!status)
|
|
|
|
goto out;
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
ret = 1;
|
2008-12-03 17:39:53 +08:00
|
|
|
again:
|
2009-02-09 14:38:50 +08:00
|
|
|
inc_irq_stat(apic_perf_irqs);
|
2008-12-03 17:39:53 +08:00
|
|
|
ack = status;
|
2008-12-22 18:10:42 +08:00
|
|
|
for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
2008-12-17 20:09:20 +08:00
|
|
|
struct perf_counter *counter = cpuc->counters[bit];
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
clear_bit(bit, (unsigned long *) &status);
|
|
|
|
if (!counter)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
perf_save_and_restart(counter);
|
|
|
|
|
2008-12-10 19:33:23 +08:00
|
|
|
switch (counter->hw_event.record_type) {
|
2008-12-03 17:39:53 +08:00
|
|
|
case PERF_RECORD_SIMPLE:
|
|
|
|
continue;
|
|
|
|
case PERF_RECORD_IRQ:
|
|
|
|
perf_store_irq_data(counter, instruction_pointer(regs));
|
|
|
|
break;
|
|
|
|
case PERF_RECORD_GROUP:
|
|
|
|
perf_handle_group(counter, &status, &ack);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* From NMI context we cannot call into the scheduler to
|
2008-12-17 16:09:13 +08:00
|
|
|
* do a task wakeup - but we mark these generic as
|
2008-12-03 17:39:53 +08:00
|
|
|
* wakeup_pending and initate a wakeup callback:
|
|
|
|
*/
|
|
|
|
if (nmi) {
|
|
|
|
counter->wakeup_pending = 1;
|
|
|
|
set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
|
|
|
|
} else {
|
|
|
|
wake_up(&counter->waitq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
hw_perf_ack_status(ack);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Repeat if there is more work to be done:
|
|
|
|
*/
|
2009-03-06 01:08:27 +08:00
|
|
|
status = hw_perf_get_status(cpuc->throttle_ctrl);
|
2008-12-03 17:39:53 +08:00
|
|
|
if (status)
|
|
|
|
goto again;
|
2008-12-08 21:20:16 +08:00
|
|
|
out:
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
2009-01-23 17:13:01 +08:00
|
|
|
* Restore - do not reenable when global enable is off or throttled:
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2009-01-23 21:36:16 +08:00
|
|
|
if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
|
2009-03-06 01:08:27 +08:00
|
|
|
hw_perf_restore(cpuc->throttle_ctrl);
|
|
|
|
|
|
|
|
return ret;
|
2009-01-23 17:13:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void perf_counter_unthrottle(void)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc;
|
|
|
|
|
|
|
|
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (unlikely(!perf_counters_initialized))
|
|
|
|
return;
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-01-23 21:36:16 +08:00
|
|
|
if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
|
2009-01-23 17:13:01 +08:00
|
|
|
if (printk_ratelimit())
|
2009-01-23 21:36:16 +08:00
|
|
|
printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
|
2009-03-06 01:08:27 +08:00
|
|
|
hw_perf_restore(cpuc->throttle_ctrl);
|
2009-01-23 17:13:01 +08:00
|
|
|
}
|
2009-01-23 21:36:16 +08:00
|
|
|
cpuc->interrupts = 0;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void smp_perf_counter_interrupt(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
irq_enter();
|
|
|
|
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
|
2009-03-06 01:08:27 +08:00
|
|
|
ack_APIC_irq();
|
2008-12-03 17:39:53 +08:00
|
|
|
__smp_perf_counter_interrupt(regs, 0);
|
|
|
|
irq_exit();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This handler is triggered by NMI contexts:
|
|
|
|
*/
|
|
|
|
void perf_counter_notify(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc;
|
|
|
|
unsigned long flags;
|
|
|
|
int bit, cpu;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
cpuc = &per_cpu(cpu_hw_counters, cpu);
|
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
|
|
|
|
struct perf_counter *counter = cpuc->counters[bit];
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
if (!counter)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (counter->wakeup_pending) {
|
|
|
|
counter->wakeup_pending = 0;
|
|
|
|
wake_up(&counter->waitq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
|
|
|
|
2009-01-23 21:16:53 +08:00
|
|
|
void perf_counters_lapic_init(int nmi)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
|
|
|
u32 apic_val;
|
|
|
|
|
|
|
|
if (!perf_counters_initialized)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Enable the performance counter vector in the APIC LVT:
|
|
|
|
*/
|
|
|
|
apic_val = apic_read(APIC_LVTERR);
|
|
|
|
|
|
|
|
apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
|
|
|
|
if (nmi)
|
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
|
|
else
|
|
|
|
apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
|
|
|
|
apic_write(APIC_LVTERR, apic_val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __kprobes
|
|
|
|
perf_counter_nmi_handler(struct notifier_block *self,
|
|
|
|
unsigned long cmd, void *__args)
|
|
|
|
{
|
|
|
|
struct die_args *args = __args;
|
|
|
|
struct pt_regs *regs;
|
2009-03-06 01:08:27 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case DIE_NMI:
|
|
|
|
case DIE_NMI_IPI:
|
|
|
|
break;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
default:
|
2008-12-03 17:39:53 +08:00
|
|
|
return NOTIFY_DONE;
|
2009-03-06 01:08:27 +08:00
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
regs = args->regs;
|
|
|
|
|
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
2009-03-06 01:08:27 +08:00
|
|
|
ret = __smp_perf_counter_interrupt(regs, 1);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
return ret ? NOTIFY_STOP : NOTIFY_OK;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
|
2009-02-05 00:11:34 +08:00
|
|
|
.notifier_call = perf_counter_nmi_handler,
|
|
|
|
.next = NULL,
|
|
|
|
.priority = 1
|
2008-12-03 17:39:53 +08:00
|
|
|
};
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
static struct pmc_x86_ops pmc_intel_ops = {
|
|
|
|
.save_disable_all = pmc_intel_save_disable_all,
|
|
|
|
.restore_all = pmc_intel_restore_all,
|
2009-03-06 01:08:27 +08:00
|
|
|
.get_status = pmc_intel_get_status,
|
|
|
|
.ack_status = pmc_intel_ack_status,
|
|
|
|
.enable = pmc_intel_enable,
|
|
|
|
.disable = pmc_intel_disable,
|
2009-02-27 20:39:09 +08:00
|
|
|
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
|
|
|
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
|
|
|
.event_map = pmc_intel_event_map,
|
2009-03-06 01:08:27 +08:00
|
|
|
.raw_event = pmc_intel_raw_event,
|
2009-02-27 20:39:09 +08:00
|
|
|
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
|
|
|
};
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
static struct pmc_x86_ops pmc_amd_ops = {
|
|
|
|
.save_disable_all = pmc_amd_save_disable_all,
|
|
|
|
.restore_all = pmc_amd_restore_all,
|
2009-03-06 01:08:27 +08:00
|
|
|
.get_status = pmc_amd_get_status,
|
|
|
|
.ack_status = pmc_amd_ack_status,
|
|
|
|
.enable = pmc_amd_enable,
|
|
|
|
.disable = pmc_amd_disable,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
.eventsel = MSR_K7_EVNTSEL0,
|
|
|
|
.perfctr = MSR_K7_PERFCTR0,
|
|
|
|
.event_map = pmc_amd_event_map,
|
2009-03-06 01:08:27 +08:00
|
|
|
.raw_event = pmc_amd_raw_event,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
|
|
|
};
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
static struct pmc_x86_ops *pmc_intel_init(void)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
|
|
|
union cpuid10_eax eax;
|
|
|
|
unsigned int ebx;
|
2008-12-17 17:51:15 +08:00
|
|
|
unsigned int unused;
|
|
|
|
union cpuid10_edx edx;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check whether the Architectural PerfMon supports
|
|
|
|
* Branch Misses Retired Event or not.
|
|
|
|
*/
|
2008-12-17 17:51:15 +08:00
|
|
|
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
|
2008-12-03 17:39:53 +08:00
|
|
|
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
|
2009-02-27 20:39:09 +08:00
|
|
|
return NULL;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("Intel Performance Monitoring support detected.\n");
|
|
|
|
pr_info("... version: %d\n", eax.split.version_id);
|
|
|
|
pr_info("... bit width: %d\n", eax.split.bit_width);
|
|
|
|
pr_info("... mask length: %d\n", eax.split.mask_length);
|
2009-02-27 20:39:09 +08:00
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
nr_counters_generic = eax.split.num_counters;
|
2009-02-27 20:39:09 +08:00
|
|
|
nr_counters_fixed = edx.split.num_counters_fixed;
|
|
|
|
counter_value_mask = (1ULL << eax.split.bit_width) - 1;
|
|
|
|
|
|
|
|
return &pmc_intel_ops;
|
|
|
|
}
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
static struct pmc_x86_ops *pmc_amd_init(void)
|
|
|
|
{
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 old;
|
|
|
|
int bits;
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
nr_counters_generic = 4;
|
|
|
|
nr_counters_fixed = 0;
|
2009-03-06 01:08:27 +08:00
|
|
|
counter_value_mask = ~0ULL;
|
|
|
|
|
|
|
|
rdmsrl(MSR_K7_PERFCTR0, old);
|
|
|
|
wrmsrl(MSR_K7_PERFCTR0, counter_value_mask);
|
|
|
|
/*
|
|
|
|
* read the truncated mask
|
|
|
|
*/
|
|
|
|
rdmsrl(MSR_K7_PERFCTR0, counter_value_mask);
|
|
|
|
wrmsrl(MSR_K7_PERFCTR0, old);
|
|
|
|
|
|
|
|
bits = 32 + fls(counter_value_mask >> 32);
|
|
|
|
if (bits == 32)
|
|
|
|
bits = fls((u32)counter_value_mask);
|
|
|
|
counter_value_bits = bits;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("AMD Performance Monitoring support detected.\n");
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
|
|
|
|
return &pmc_amd_ops;
|
|
|
|
}
|
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
void __init init_hw_perf_counters(void)
|
|
|
|
{
|
|
|
|
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (boot_cpu_data.x86_vendor) {
|
|
|
|
case X86_VENDOR_INTEL:
|
|
|
|
pmc_ops = pmc_intel_init();
|
|
|
|
break;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
case X86_VENDOR_AMD:
|
|
|
|
pmc_ops = pmc_amd_init();
|
|
|
|
break;
|
2009-02-27 20:39:09 +08:00
|
|
|
}
|
|
|
|
if (!pmc_ops)
|
|
|
|
return;
|
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("... num counters: %d\n", nr_counters_generic);
|
2008-12-17 20:09:20 +08:00
|
|
|
if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
|
|
|
|
nr_counters_generic = X86_PMC_MAX_GENERIC;
|
2008-12-03 17:39:53 +08:00
|
|
|
WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
|
2008-12-17 20:09:20 +08:00
|
|
|
nr_counters_generic, X86_PMC_MAX_GENERIC);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2008-12-17 20:09:20 +08:00
|
|
|
perf_counter_mask = (1 << nr_counters_generic) - 1;
|
|
|
|
perf_max_counters = nr_counters_generic;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("... value mask: %016Lx\n", counter_value_mask);
|
2008-12-22 18:10:42 +08:00
|
|
|
|
2008-12-17 20:09:20 +08:00
|
|
|
if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
|
|
|
|
nr_counters_fixed = X86_PMC_MAX_FIXED;
|
2008-12-17 17:51:15 +08:00
|
|
|
WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
|
2008-12-17 20:09:20 +08:00
|
|
|
nr_counters_fixed, X86_PMC_MAX_FIXED);
|
2008-12-17 17:51:15 +08:00
|
|
|
}
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("... fixed counters: %d\n", nr_counters_fixed);
|
2008-12-17 20:09:20 +08:00
|
|
|
|
|
|
|
perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("... counter mask: %016Lx\n", perf_counter_mask);
|
2008-12-15 04:58:46 +08:00
|
|
|
perf_counters_initialized = true;
|
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
perf_counters_lapic_init(0);
|
|
|
|
register_die_notifier(&perf_counter_nmi_notifier);
|
|
|
|
}
|
2008-12-11 19:46:46 +08:00
|
|
|
|
2008-12-17 16:09:13 +08:00
|
|
|
static void pmc_generic_read(struct perf_counter *counter)
|
2008-12-13 16:00:03 +08:00
|
|
|
{
|
|
|
|
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
|
|
|
|
}
|
|
|
|
|
2008-12-11 20:21:10 +08:00
|
|
|
static const struct hw_perf_counter_ops x86_perf_counter_ops = {
|
2008-12-17 21:20:28 +08:00
|
|
|
.enable = pmc_generic_enable,
|
|
|
|
.disable = pmc_generic_disable,
|
|
|
|
.read = pmc_generic_read,
|
2008-12-11 19:46:46 +08:00
|
|
|
};
|
|
|
|
|
2008-12-11 20:21:10 +08:00
|
|
|
const struct hw_perf_counter_ops *
|
|
|
|
hw_perf_counter_init(struct perf_counter *counter)
|
2008-12-11 19:46:46 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __hw_perf_counter_init(counter);
|
|
|
|
if (err)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return &x86_perf_counter_ops;
|
|
|
|
}
|