2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-24 21:24:00 +08:00

perf, x86: Add a key to simplify template lookup in Pentium-4 PMU

Currently, we use opcode(Event and Event-Selector) + emask to
look up template in p4_templates.

But cache events (L1-dcache-load-misses, LLC-load-misses, etc)
use the same event(P4_REPLAY_EVENT) to do the counting, ie, they
have the same opcode and emask. So we can not use current lookup
mechanism to find the template for cache events.

This patch introduces a "key", which is the index into
p4_templates. The low 12 bits of CCCR are reserved, so we can
hide the "key" in the low 12 bits of hwc->config.

We extract the key from hwc->config and then quickly find the
template.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Lin Ming 2010-03-18 18:33:07 +08:00 committed by Ingo Molnar
parent 55632770d7
commit f34edbc1cd
2 changed files with 38 additions and 53 deletions

View File

@ -65,6 +65,7 @@
#define P4_CCCR_THREAD_SINGLE 0x00010000U #define P4_CCCR_THREAD_SINGLE 0x00010000U
#define P4_CCCR_THREAD_BOTH 0x00020000U #define P4_CCCR_THREAD_BOTH 0x00020000U
#define P4_CCCR_THREAD_ANY 0x00030000U #define P4_CCCR_THREAD_ANY 0x00030000U
#define P4_CCCR_RESERVED 0x00000fffU
/* Non HT mask */ /* Non HT mask */
#define P4_CCCR_MASK \ #define P4_CCCR_MASK \
@ -116,7 +117,7 @@
#define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_escr(v) (((u64)(v)) << 32)
#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) #define p4_config_unpack_escr(v) (((u64)(v)) >> 32)
#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL)
#define p4_config_unpack_emask(v) \ #define p4_config_unpack_emask(v) \
({ \ ({ \
@ -126,6 +127,8 @@
t; \ t; \
}) })
#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED)
#define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)

View File

@ -18,6 +18,7 @@ struct p4_event_template {
u32 opcode; /* ESCR event + CCCR selector */ u32 opcode; /* ESCR event + CCCR selector */
u64 config; /* packed predefined bits */ u64 config; /* packed predefined bits */
int dep; /* upstream dependency event index */ int dep; /* upstream dependency event index */
int key; /* index into p4_templates */
unsigned int emask; /* ESCR EventMask */ unsigned int emask; /* ESCR EventMask */
unsigned int escr_msr[2]; /* ESCR MSR for this event */ unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int cntr[2]; /* counter index (offset) */ unsigned int cntr[2]; /* counter index (offset) */
@ -39,38 +40,31 @@ static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
*/ */
struct p4_event_template p4_templates[] = { struct p4_event_template p4_templates[] = {
[0] = { [0] = {
.opcode = P4_UOP_TYPE,
.config = 0,
.dep = -1,
.emask =
P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
.cntr = { 16, 17 },
},
[1] = {
.opcode = P4_GLOBAL_POWER_EVENTS, .opcode = P4_GLOBAL_POWER_EVENTS,
.config = 0, .config = 0,
.dep = -1, .dep = -1,
.key = 0,
.emask = .emask =
P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
.cntr = { 0, 2 }, .cntr = { 0, 2 },
}, },
[2] = { [1] = {
.opcode = P4_INSTR_RETIRED, .opcode = P4_INSTR_RETIRED,
.config = 0, .config = 0,
.dep = -1, /* needs front-end tagging */ .dep = -1, /* needs front-end tagging */
.key = 1,
.emask = .emask =
P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) |
P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
.cntr = { 12, 14 }, .cntr = { 12, 14 },
}, },
[3] = { [2] = {
.opcode = P4_BSQ_CACHE_REFERENCE, .opcode = P4_BSQ_CACHE_REFERENCE,
.config = 0, .config = 0,
.dep = -1, .dep = -1,
.key = 2,
.emask = .emask =
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
@ -81,10 +75,11 @@ struct p4_event_template p4_templates[] = {
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
.cntr = { 0, 2 }, .cntr = { 0, 2 },
}, },
[4] = { [3] = {
.opcode = P4_BSQ_CACHE_REFERENCE, .opcode = P4_BSQ_CACHE_REFERENCE,
.config = 0, .config = 0,
.dep = -1, .dep = -1,
.key = 3,
.emask = .emask =
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
@ -92,10 +87,11 @@ struct p4_event_template p4_templates[] = {
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
.cntr = { 0, 3 }, .cntr = { 0, 3 },
}, },
[5] = { [4] = {
.opcode = P4_RETIRED_BRANCH_TYPE, .opcode = P4_RETIRED_BRANCH_TYPE,
.config = 0, .config = 0,
.dep = -1, .dep = -1,
.key = 4,
.emask = .emask =
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) |
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) |
@ -104,48 +100,38 @@ struct p4_event_template p4_templates[] = {
.escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 },
.cntr = { 4, 6 }, .cntr = { 4, 6 },
}, },
[6] = { [5] = {
.opcode = P4_MISPRED_BRANCH_RETIRED, .opcode = P4_MISPRED_BRANCH_RETIRED,
.config = 0, .config = 0,
.dep = -1, .dep = -1,
.key = 5,
.emask = .emask =
P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
.cntr = { 12, 14 }, .cntr = { 12, 14 },
}, },
[7] = { [6] = {
.opcode = P4_FSB_DATA_ACTIVITY, .opcode = P4_FSB_DATA_ACTIVITY,
.config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
.dep = -1, .dep = -1,
.key = 6,
.emask = .emask =
P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) |
P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
.cntr = { 0, 2 }, .cntr = { 0, 2 },
}, },
}; [7] = {
.opcode = P4_UOP_TYPE,
static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { .config = 0,
/* non-halted CPU clocks */ .dep = -1,
[PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], .key = 7,
.emask =
/* retired instructions: dep on tagging the FSB */ P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
[PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
/* cache hits */ .cntr = { 16, 17 },
[PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], },
/* cache misses */
[PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4],
/* branch instructions retired */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5],
/* mispredicted branches retired */
[PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6],
/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
[PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7],
}; };
static u64 p4_pmu_event_map(int hw_event) static u64 p4_pmu_event_map(int hw_event)
@ -153,11 +139,11 @@ static u64 p4_pmu_event_map(int hw_event)
struct p4_event_template *tpl; struct p4_event_template *tpl;
u64 config; u64 config;
if (hw_event > ARRAY_SIZE(p4_event_map)) { if (hw_event > ARRAY_SIZE(p4_templates)) {
printk_once(KERN_ERR "PMU: Incorrect event index\n"); printk_once(KERN_ERR "PMU: Incorrect event index\n");
return 0; return 0;
} }
tpl = p4_event_map[hw_event]; tpl = &p4_templates[hw_event];
/* /*
* fill config up according to * fill config up according to
@ -167,6 +153,7 @@ static u64 p4_pmu_event_map(int hw_event)
config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT);
config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT);
config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
/* on HT machine we need a special bit */ /* on HT machine we need a special bit */
if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
@ -187,16 +174,11 @@ static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src)
static struct p4_event_template *p4_pmu_template_lookup(u64 config) static struct p4_event_template *p4_pmu_template_lookup(u64 config)
{ {
u32 opcode = p4_config_unpack_opcode(config); int key = p4_config_unpack_key(config);
unsigned int emask = p4_config_unpack_emask(config);
unsigned int i;
for (i = 0; i < ARRAY_SIZE(p4_templates); i++) {
if (opcode == p4_templates[i].opcode &&
p4_pmu_emask_match(emask, p4_templates[i].emask))
return &p4_templates[i];
}
if (key < ARRAY_SIZE(p4_templates))
return &p4_templates[key];
else
return NULL; return NULL;
} }
@ -564,7 +546,7 @@ static __initconst struct x86_pmu p4_pmu = {
.perfctr = MSR_P4_BPU_PERFCTR0, .perfctr = MSR_P4_BPU_PERFCTR0,
.event_map = p4_pmu_event_map, .event_map = p4_pmu_event_map,
.raw_event = p4_pmu_raw_event, .raw_event = p4_pmu_raw_event,
.max_events = ARRAY_SIZE(p4_event_map), .max_events = ARRAY_SIZE(p4_templates),
.get_event_constraints = x86_get_event_constraints, .get_event_constraints = x86_get_event_constraints,
/* /*
* IF HT disabled we may need to use all * IF HT disabled we may need to use all