gprofng: add hardware counters for AMD Zen3

Historically, we have used several APIs (perfctr, libcpc, perf_event_open) for profiling.
For each hardware we have several tables of hardware counters.
Some information is duplicated in these tables.
Some of the information is no longer used.
I did not touch the existing hwc tables.
I added a new hwc table for an AMD Zen3 machine.

ChangeLog
2024-05-16  Vladimir Mezentsev  <vladimir.mezentsev@oracle.com>

	PR gprofng/31123
	* common/core_pcbe.c (core_pcbe_get_events): Add new argument.
	* common/hwc_cpus.h: New constants for AMD hardware.
	* common/hwcdrv.c: Add new argument to hwcdrv_get_descriptions.
	Clean up the code.
	* common/hwcdrv.h: Likewise.
	* common/hwcfuncs.c (hwcdrv_get_descriptions): Add new argument.
	* common/hwctable.c: Add the hwc table for AMD Zen3.
	* src/hwc_amd_zen3.h: New file.
	* common/opteron_pcbe.c: Add new argument to opt_pcbe_get_events.
	* src/collctrl.cc: Remove unused variable.
	* src/collctrl.h: Likewise.
This commit is contained in:
Vladimir Mezentsev 2024-05-16 21:00:51 -07:00
parent 663741df74
commit ee7af0e710
10 changed files with 820 additions and 126 deletions

View File

@ -2734,13 +2734,6 @@ core_pcbe_init (void)
{
switch (cpuid_getvendor ())
{
case X86_VENDOR_AMD:
snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD);
events_table = events_generic;
num_gpc = 4;
num_ffc = 0;
total_pmc = num_gpc + num_ffc;
return 0;
case ARM_CPU_IMP_ARM:
case ARM_CPU_IMP_BRCM:
case ARM_CPU_IMP_CAVIUM:
@ -2948,7 +2941,7 @@ core_pcbe_cpuref (void)
}
static int
core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl)
{
int count = 0;
const struct events_table_t *pevent;
@ -2966,6 +2959,14 @@ core_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
count++;
}
/* add generic events here */
if (raw_hwc_tbl)
for (Hwcentry *h = raw_hwc_tbl; h->name; h++)
if (h->use_perf_event_type)
for (int jj = 0; jj < num_gpc; jj++)
{
hwc_cb (jj, h->name);
count++;
}
return count;
}

View File

@ -34,8 +34,16 @@ typedef struct
char *cpu_modelstr;
} cpu_info_t;
#ifdef __cplusplus
extern "C"
{
#endif
extern cpu_info_t *read_cpuinfo();
#ifdef __cplusplus
}
#endif
#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */
/* type for specifying CPU register number */
@ -105,6 +113,8 @@ extern cpu_info_t *read_cpuinfo();
#define CPC_AMD_FAM_11H 2502 /* Griffin... */
#define CPC_AMD_FAM_15H 2503
#define CPC_AMD_Authentic 2504
#define CPC_AMD_FAM_19H_ZEN3 2505
#define CPC_AMD_FAM_19H_ZEN4 2506
#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier)
#define CPC_FOX 3004 /* pseudo-chip */
@ -117,7 +127,32 @@ extern cpu_info_t *read_cpuinfo();
#define CPC_SPARC64_X 4006 /* Athena */
#define CPC_SPARC64_XII 4010 /* Athena++ */
// aarch64. Constants from tools/arch/arm64/include/asm/cputype.h
#define AMD_FAM_19H_ZEN3_NAME "AMD Family 19h (Zen3)"
#define AMD_FAM_19H_ZEN4_NAME "AMD Family 19h (Zen4)"
enum Amd_famaly
{
AMD_ZEN_FAMILY = 0x17,
AMD_ZEN3_FAMILY = 0x19
};
enum Amd_model
{
AMD_ZEN_RYZEN = 0x1,
AMD_ZENPLUS_RYZEN = 0x8,
AMD_ZENPLUS_RYZEN2 = 0x18,
AMD_ZEN2_RYZEN = 0x31,
AMD_ZEN2_RYZEN2 = 0x71,
AMD_ZEN2_RYZEN3 = 0x60,
AMD_ZEN3_RYZEN = 0x1,
AMD_ZEN3_RYZEN2 = 0x21,
AMD_ZEN3_RYZEN3 = 0x50,
AMD_ZEN3_EPYC_TRENTO = 0x30,
AMD_ZEN4_RYZEN = 0x61,
AMD_ZEN4_EPYC = 0x11
};
// aarch64. Constants from tools/arch/arm64/include/asm/cputype.h
// in https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
enum {
ARM_CPU_IMP_ARM = 0x41,
@ -147,6 +182,8 @@ enum {
{CPC_AMD_FAM_15H , "AMD Family 15h Model 01h"}, \
{CPC_AMD_FAM_15H , "AMD Family 15h Model 02h"},/*future*/ \
{CPC_AMD_FAM_15H , "AMD Family 15h Model 03h"},/*future*/ \
{CPC_AMD_FAM_19H_ZEN3 , AMD_FAM_19H_ZEN3_NAME}, \
{CPC_AMD_FAM_19H_ZEN4 , AMD_FAM_19H_ZEN4_NAME}, \
{CPC_PENTIUM_4_HT , "Pentium 4 with HyperThreading"}, \
{CPC_PENTIUM_4 , "Pentium 4"}, \
{CPC_PENTIUM_PRO_MMX , "Pentium Pro with MMX, Pentium II"}, \

View File

@ -34,6 +34,7 @@
#include "cpuid.c" /* ftns for identifying a chip */
static hdrv_pcbe_api_t *pcbe_driver = NULL;
static hdrv_pcbe_api_t hdrv_pcbe_core_api;
static hdrv_pcbe_api_t hdrv_pcbe_opteron_api;
static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = {
@ -94,8 +95,6 @@ hwcdrv_lookup_cpuver (const char * cpcN_cciname)
* For M8, a 4-bit mask of supported PICs is stored in bits [23:20].
*/
IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0;
static const attr_info_t perfctr_sparc_attrs[] = {
{NTXT ("user"), 0, 0x01, 16}, //usr
{NTXT ("system"), 0, 0x01, 17}, //os
@ -132,8 +131,9 @@ myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc,
eventsel_t *eventsel, eventsel_t *valid_umask,
uint_t *pmc_sel)
{
if (hwcdrv_get_x86_eventnum &&
!hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel))
if (pcbe_driver && pcbe_driver->hdrv_pcbe_get_eventnum &&
!pcbe_driver->hdrv_pcbe_get_eventnum (eventname, pmc, eventsel,
valid_umask, pmc_sel))
return 0;
/* check for numerically-specified counters */
@ -214,7 +214,7 @@ set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask,
return 0;
}
IS_GLOBAL int
static int
hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
eventsel_t *return_event, uint_t *return_pmc_sel)
{
@ -287,6 +287,7 @@ perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid,
rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags);
if (rc != -1)
return rc;
TprintfT (0, "perf_event_open %d: errno=%d %s\n", retry, errno, strerror(errno));
}
return rc;
}
@ -375,7 +376,6 @@ static struct
int internal_open_called;
hwcfuncs_tsd_get_fn_t find_vpc_ctx;
unsigned hwcdef_cnt; /* number of *active* hardware counters */
hwcdrv_get_events_fn_t *get_events;
} hdrv_pcl_state;
static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED};
@ -813,14 +813,13 @@ hdrv_pcl_internal_open ()
hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii];
if (!ppcbe->hdrv_pcbe_init ())
{
pcbe_driver = ppcbe;
hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name ();
hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname);
if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED)
goto internal_open_error;
hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters ();
hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref ();
hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events;
hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum;
break;
}
}
@ -894,11 +893,12 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
}
HWCDRV_API int
hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb)
hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb,
Hwcentry *raw_hwc_tbl)
{
int count = 0;
if (hwc_cb && hdrv_pcl_state.get_events)
count = hdrv_pcl_state.get_events (hwc_cb);
if (hwc_cb && pcbe_driver && pcbe_driver->hdrv_pcbe_get_events)
count = pcbe_driver->hdrv_pcbe_get_events (hwc_cb, raw_hwc_tbl);
if (attr_cb)
for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++)
attr_cb (perfctr_attrs_table[ii].attrname);

View File

@ -126,11 +126,13 @@ extern "C"
*/
int (*hwcdrv_get_descriptions)(hwcf_hwc_cb_t *hwc_find_action,
hwcf_attr_cb_t *attr_find_action);
/* Initiate callbacks with all available HWC names and and HWC attributes.
hwcf_attr_cb_t *attr_find_action,
Hwcentry *raw_hwc_tbl);
/* Initiate callbacks with all available HWC names and HWC attributes.
Input:
<hwc_find_action>: if not NULL, will be called once for each HWC
<attr_find_action>: if not NULL, will be called once for each attribute
<raw_hwc_tbl>: counter definitions.
Return: 0 if successful
or a cpc return code upon error
*/
@ -260,15 +262,6 @@ extern "C"
( (((eventsel_t)(evnum) & 0x0f00ULL) << 24) | ((eventsel_t)(evnum) & ~0x0f00ULL) )
typedef uint64_t eventsel_t;
extern int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name,
eventsel_t *return_event, uint_t *return_pmc_sel);
typedef int (hwcdrv_get_events_fn_t) (hwcf_hwc_cb_t *hwc_cb);
typedef int (hwcdrv_get_eventnum_fn_t) (const char *eventname, uint_t pmc,
eventsel_t *eventnum,
eventsel_t *valid_umask, uint_t *pmc_sel);
extern hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum;
typedef struct
{
const char * attrname; // user-visible name of attribute
@ -285,7 +278,7 @@ extern "C"
uint_t (*hdrv_pcbe_ncounters)(void);
const char *(*hdrv_pcbe_impl_name)(void);
const char *(*hdrv_pcbe_cpuref)(void);
int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb);
int (*hdrv_pcbe_get_events)(hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl);
int (*hdrv_pcbe_get_eventnum)(const char * eventname, uint_t pmc,
eventsel_t *eventnum, eventsel_t *valid_umask,
uint_t *pmc_sel);

View File

@ -63,7 +63,7 @@ hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn)
HWCDRV_API int
hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_find_action,
hwcf_attr_cb_t *attr_find_action)
hwcf_attr_cb_t *attr_find_action, Hwcentry *hwcdef)
{
return 0;
}

View File

@ -2369,82 +2369,86 @@ static Hwcentry amd_15h[] = {
#define HWCE(nm, mtr, id, op, res) \
INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
static Hwcentry generic_list[] = {
// Hardware event:
{ HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
.int_name = "cycles" },
{ HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
.int_name = "cycles~system=1~user=0" },
{ HWE("branch-instructions", STXT("Branch-instructions"),
PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },
{ HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },
{ HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),
.timecvt = 1 },
{ HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },
{ HWE("cache-references", STXT("Cache-references"),
PERF_COUNT_HW_CACHE_REFERENCES) },
{ HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },
{ HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),
.int_name = "instructions" },
{ HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),
.timecvt = 1 },
{ HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),
PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },
{ HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },
// Software event:
{ SWE("alignment-faults", STXT("Alignment Faults"),
PERF_COUNT_SW_ALIGNMENT_FAULTS) },
{ SWE("context-switches", STXT("Context Switches"),
PERF_COUNT_SW_CONTEXT_SWITCHES) },
{ SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),
.timecvt = 1 },
{ SWE("cpu-migrations", STXT("CPU Migrations"),
PERF_COUNT_SW_CPU_MIGRATIONS) },
{ SWE("emulation-faults", STXT("Emulation Faults"),
PERF_COUNT_SW_EMULATION_FAULTS) },
{ SWE("major-faults", STXT("Major Page Faults"),
PERF_COUNT_SW_PAGE_FAULTS_MAJ) },
{ SWE("minor-faults", STXT("Minor Page Faults"),
PERF_COUNT_SW_PAGE_FAULTS_MIN) },
{ SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },
{ SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),
.timecvt = 1 },
// Hardware cache event
{ HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("dTLB-loads", STXT("D-TLB Loads"),
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),
PERF_COUNT_HW_CACHE_ITLB,
#define HWC_GENERIC \
/* Hardware event: */\
{ HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
.int_name = "cycles" },\
{ HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,\
.int_name = "cycles~system=1~user=0" },\
{ HWE("branch-instructions", STXT("Branch-instructions"),\
PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },\
{ HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },\
{ HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),\
.timecvt = 1 },\
{ HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },\
{ HWE("cache-references", STXT("Cache-references"),\
PERF_COUNT_HW_CACHE_REFERENCES) },\
{ HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },\
{ HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),\
.int_name = "instructions" },\
{ HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),\
.timecvt = 1 },\
{ HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),\
PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },\
{ HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),\
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },\
/* Software event: */\
{ SWE("alignment-faults", STXT("Alignment Faults"),\
PERF_COUNT_SW_ALIGNMENT_FAULTS) },\
{ SWE("context-switches", STXT("Context Switches"),\
PERF_COUNT_SW_CONTEXT_SWITCHES) },\
{ SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),\
.timecvt = 1 },\
{ SWE("cpu-migrations", STXT("CPU Migrations"),\
PERF_COUNT_SW_CPU_MIGRATIONS) },\
{ SWE("emulation-faults", STXT("Emulation Faults"),\
PERF_COUNT_SW_EMULATION_FAULTS) },\
{ SWE("major-faults", STXT("Major Page Faults"),\
PERF_COUNT_SW_PAGE_FAULTS_MAJ) },\
{ SWE("minor-faults", STXT("Minor Page Faults"),\
PERF_COUNT_SW_PAGE_FAULTS_MIN) },\
{ SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },\
{ SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),\
.timecvt = 1 },\
/* Hardware cache event: */\
{ HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),\
PERF_COUNT_HW_CACHE_L1D,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
{ HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),\
PERF_COUNT_HW_CACHE_L1D,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
{ HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),\
PERF_COUNT_HW_CACHE_L1D,\
PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
{ HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),\
PERF_COUNT_HW_CACHE_L1D,\
PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),\
PERF_COUNT_HW_CACHE_L1I,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),\
PERF_COUNT_HW_CACHE_L1I,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
{ HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),\
PERF_COUNT_HW_CACHE_DTLB,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
{ HWCE("dTLB-loads", STXT("D-TLB Loads"),\
PERF_COUNT_HW_CACHE_DTLB,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },\
{ HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),\
PERF_COUNT_HW_CACHE_ITLB,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },\
{ HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\
PERF_COUNT_HW_CACHE_ITLB,\
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
static Hwcentry generic_list[] = {
HWC_GENERIC
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
#include "hwc_amd_zen3.h"
/* structure defining the counters for a CPU type */
typedef struct
{
@ -2516,6 +2520,7 @@ static cpu_list_t cputabs[] = {
{CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
{ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}},
{0, generic_list, {"insts,,cycles", 0}},
};
@ -3033,7 +3038,7 @@ setup_cpc_general (int skip_hwc_test)
valid_cpu_tables[1] = papi_generic_list;
Tprintf (DBG_LT2, "hwctable: setup_cpc(): getting descriptions \n");
// populate cpcx_raw and cpcx_attr
hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb);
hwcdrv->hwcdrv_get_descriptions (hwc_cb, attrs_cb, cputabs_entry->stdlist_table);
for (int kk = 0; kk < 2; kk++)
{ // collect and er_kernel
hwc_process_raw_ctrs (kk, &cpcx_std[kk], &cpcx_raw[kk], &cpcx_hidden[kk],

View File

@ -304,6 +304,8 @@ static amd_generic_event_t family_10h_generic_events[] = {
};
static amd_event_t *amd_events = NULL;
static const char *amd_impl_name = "";
static const char *amd_cpuref = "";
static uint_t amd_family;
static amd_generic_event_t *amd_generic_events = NULL;
@ -318,19 +320,39 @@ opt_pcbe_init (void)
if (cpuid_getvendor () != X86_VENDOR_AMD)
return -1;
/*
* Figure out processor revision here and assign appropriate
* event configuration.
*/
amd_impl_name = GTXT ("Unknown AMD processor");
switch (amd_family)
{
case OPTERON_FAMILY:
amd_events = opt_events_rev_E;
amd_generic_events = opt_generic_events;
amd_impl_name = "AMD Opteron & Athlon64";
amd_cpuref = GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's"
" Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\n"
"AMD publication #26094");
break;
case AMD_FAMILY_10H:
amd_events = family_10h_events;
amd_generic_events = family_10h_generic_events;
amd_impl_name = "AMD Family 10h";
amd_cpuref = GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's"
" Guide (BKDG) For AMD Family 10h Processors,\"\n"
"AMD publication #31116");
break;
case AMD_ZEN3_FAMILY:
switch (cpuid_getmodel ())
{
case AMD_ZEN3_RYZEN:
case AMD_ZEN3_RYZEN2:
case AMD_ZEN3_RYZEN3:
case AMD_ZEN3_EPYC_TRENTO:
amd_impl_name = AMD_FAM_19H_ZEN3_NAME;
break;
case AMD_ZEN4_RYZEN:
case AMD_ZEN4_EPYC:
amd_impl_name = AMD_FAM_19H_ZEN4_NAME;
break;
}
break;
}
return 0;
@ -345,27 +367,17 @@ opt_pcbe_ncounters (void)
static const char *
opt_pcbe_impl_name (void)
{
if (amd_family == OPTERON_FAMILY)
return ("AMD Opteron & Athlon64");
else if (amd_family == AMD_FAMILY_10H)
return ("AMD Family 10h");
else
return ("Unknown AMD processor");
return amd_impl_name;
}
static const char *
opt_pcbe_cpuref (void)
{
if (amd_family == OPTERON_FAMILY)
return GTXT ("See Chapter 10 of the \"BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD Opteron Processors,\"\nAMD publication #26094");
else if (amd_family == AMD_FAMILY_10H)
return GTXT ("See section 3.15 of the \"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors,\"\nAMD publication #31116");
else
return GTXT ("Unknown AMD processor");
return amd_cpuref;
}
static int
opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb, Hwcentry *raw_hwc_tbl)
{
int count = 0;
for (uint_t kk = 0; amd_events && amd_events[kk].name; kk++)
@ -380,6 +392,14 @@ opt_pcbe_get_events (hwcf_hwc_cb_t *hwc_cb)
hwc_cb (jj, amd_generic_events[kk].name);
count++;
}
if (raw_hwc_tbl)
for (Hwcentry *h = raw_hwc_tbl; h->name; h++)
if (h->use_perf_event_type)
for (uint_t jj = 0; jj < opt_pcbe_ncounters (); jj++)
{
hwc_cb (jj, h->name);
count++;
}
return count;
}
@ -392,6 +412,12 @@ opt_pcbe_get_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel,
*eventsel = (eventsel_t) - 1;
*event_valid_umask = 0x0;
if (amd_events == NULL && amd_generic_events == NULL)
{ // These tables are created only for old hardware.
*eventsel = 0;
return 0;
}
/* search table */
for (kk = 0; amd_events && amd_events[kk].name; kk++)
{

View File

@ -131,6 +131,10 @@ read_cpuinfo()
}
fclose (procf);
}
if (cpu_info.cpu_vendorstr == NULL)
cpu_info.cpu_vendorstr = GTXT ("Unknown processor");
if (cpu_info.cpu_modelstr == NULL)
cpu_info.cpu_modelstr = GTXT ("Unknown cpu model");
return &cpu_info;
}
@ -176,7 +180,6 @@ Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC)
/* set default clock parameters */
hwcprof_enabled_cnt = 0; // must be set before calling determine_profile_params();
determine_profile_params (); // inits clk_params which is used by clock profiling AND HWCs
cpc_cpuver = CPUVER_UNDEFINED;
/* set default control values */
debug_mode = 0;
@ -271,7 +274,6 @@ Coll_Ctrl::Coll_Ctrl (Coll_Ctrl * cc)
cpu_clk_freq = cc->cpu_clk_freq;
npages = cc->npages;
page_size = cc->page_size;
cpc_cpuver = cc->cpc_cpuver;
debug_mode = cc->debug_mode;
java_mode = cc->java_mode;
java_default = cc->java_default;
@ -1470,7 +1472,6 @@ Coll_Ctrl::add_hwcstring (const char *string, char **warnmsg)
prev_cnt = 0;
/* look up the CPU version */
cpc_cpuver = hwc_get_cpc_cpuver ();
if (string && *string)
{
/* lookup counters */

View File

@ -251,7 +251,6 @@ public:
char *get_node_name () { return node_name; };
long get_ncpus () { return ncpus; };
int get_cpu_clk_freq () { return cpu_clk_freq; };
int get_cpc_cpuver () { return cpc_cpuver; };
/* disable warning about non-local filesystems */
void set_nofswarn () { nofswarn = 1; };

632
gprofng/src/hwc_amd_zen3.h Normal file
View File

@ -0,0 +1,632 @@
/* Copyright (C) 2024 Free Software Foundation, Inc.
Contributed by Oracle.
This file is part of GNU Binutils.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
#define I(nm, event, umask, mtr) INIT_HWC(nm, mtr, (event) | ((umask) << 8), PERF_TYPE_RAW)
static Hwcentry amd_zen3_list[] = {
HWC_GENERIC
/* branch: */
{ I("bp_de_redirect", 0x91, 0, STXT("Decode Redirects")) },
{ I("bp_dyn_ind_pred", 0x8e, 0, STXT("Dynamic Indirect Predictions")) },
{ I("bp_l1_btb_correct", 0x8a, 0,
STXT("L1 Branch Prediction Overrides Existing Prediction (speculative)")) },
{ I("bp_l1_tlb_fetch_hit", 0x94, 0xff,
STXT("The number of instruction fetches that hit in the L1 ITLB")) },
{ I("bp_l1_tlb_fetch_hit.if1g", 0x94, 0x4,
STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
"Instruction TLB hit (1G page size)")) },
{ I("bp_l1_tlb_fetch_hit.if2m", 0x94, 0x2,
STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
"Instruction TLB hit (2M page size)")) },
{ I("bp_l1_tlb_fetch_hit.if4k", 0x94, 0x1,
STXT("The number of instruction fetches that hit in the L1 ITLB. L1"
"Instrcution TLB hit (4K or 16K page size)")) },
{ I("bp_l2_btb_correct", 0x8b, 0,
STXT("L2 Branch Prediction Overrides Existing Prediction (speculative)")) },
{ I("bp_tlb_rel", 0x99, 0, STXT("The number of ITLB reload requests")) },
/* cache: */
{ I("bp_l1_tlb_miss_l2_tlb_hit", 0x84, 0,
STXT("L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss"
"in the L1 ITLB but hit in the L2 ITLB")) },
{ I("bp_l1_tlb_miss_l2_tlb_miss", 0x85, 0xff,
STXT("The number of instruction fetches that miss in both the L1 and L2 TLBs")) },
{ I("bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", 0x85, 0x8,
STXT("The number of valid fills into the ITLB originating from the LS"
"Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
"L2-ITLB misses. Walk for >4K Coalesced page")) },
{ I("bp_l1_tlb_miss_l2_tlb_miss.if1g", 0x85, 0x4,
STXT("The number of valid fills into the ITLB originating from the LS"
"Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
"L2-ITLB misses. Walk for 1G page")) },
{ I("bp_l1_tlb_miss_l2_tlb_miss.if2m", 0x85, 0x2,
STXT("The number of valid fills into the ITLB originating from the LS"
"Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
"L2-ITLB misses. Walk for 2M page")) },
{ I("bp_l1_tlb_miss_l2_tlb_miss.if4k", 0x85, 0x1,
STXT("The number of valid fills into the ITLB originating from the LS"
"Page-Table Walker. Tablewalk requests are issued for L1-ITLB and"
"L2-ITLB misses. Walk to 4K page")) },
{ I("bp_snp_re_sync", 0x86, 0,
STXT("The number of pipeline restarts caused by invalidating probes that hit"
"on the instruction stream currently being executed. This would happen"
"if the active instruction stream was being modified by another"
"processor in an MP system - typically a highly unlikely event")) },
{ I("ic_cache_fill_l2", 0x82, 0,
STXT("Instruction Cache Refills from L2. The number of 64 byte instruction"
"cache line was fulfilled from the L2 cache")) },
{ I("ic_cache_fill_sys", 0x83, 0,
STXT("Instruction Cache Refills from System. The number of 64 byte"
"instruction cache line fulfilled from system memory or another cache")) },
{ I("ic_cache_inval.fill_invalidated", 0x8c, 0x1,
STXT("IC line invalidated due to overwriting fill response. The number of"
"instruction cache lines invalidated. A non-SMC event is CMC (cross"
"modifying code), either from the other thread of the core or another"
"core")) },
{ I("ic_cache_inval.l2_invalidating_probe", 0x8c, 0x2,
STXT("IC line invalidated due to L2 invalidating probe (external or LS). The"
"number of instruction cache lines invalidated. A non-SMC event is CMC"
"(cross modifying code), either from the other thread of the core or"
"another core")) },
{ I("ic_fetch_stall.ic_stall_any", 0x87, 0x4,
STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
"for any reason (nothing valid in pipe ICM1)")) },
{ I("ic_fetch_stall.ic_stall_back_pressure", 0x87, 0x1,
STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
"(including IC to OC fetches) due to back-pressure")) },
{ I("ic_fetch_stall.ic_stall_dq_empty", 0x87, 0x2,
STXT("Instruction Pipe Stall. IC pipe was stalled during this clock cycle"
"(including IC to OC fetches) due to DQ empty")) },
{ I("ic_fw32", 0x80, 0,
STXT("The number of 32B fetch windows transferred from IC pipe to DE"
"instruction decoder (includes non-cacheable and cacheable fill"
"responses)")) },
{ I("ic_fw32_miss", 0x81, 0,
STXT("The number of 32B fetch windows tried to read the L1 IC and missed in"
"the full tag")) },
{ I("ic_oc_mode_switch.ic_oc_mode_switch", 0x28a, 0x1,
STXT("OC Mode Switch. IC to OC mode switch")) },
{ I("ic_oc_mode_switch.oc_ic_mode_switch", 0x28a, 0x2,
STXT("OC Mode Switch. OC to IC mode switch")) },
{ I("ic_tag_hit_miss.all_instruction_cache_accesses", 0x18e, 0x1f,
STXT("All Instruction Cache Accesses. Counts various IC tag related hit and"
"miss events")) },
{ I("ic_tag_hit_miss.instruction_cache_hit", 0x18e, 0x7,
STXT("Instruction Cache Hit. Counts various IC tag related hit and miss"
"events")) },
{ I("ic_tag_hit_miss.instruction_cache_miss", 0x18e, 0x18,
STXT("Instruction Cache Miss. Counts various IC tag related hit and miss"
"events")) },
{ I("l2_cache_req_stat.ic_access_in_l2", 0x64, 0x7,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache requests in L2")) },
{ I("l2_cache_req_stat.ic_dc_hit_in_l2", 0x64, 0xf6,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache request hit in L2 and Data cache request"
"hit in L2 (all types)")) },
{ I("l2_cache_req_stat.ic_dc_miss_in_l2", 0x64, 0x9,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache request miss in L2 and Data cache request"
"miss in L2 (all types)")) },
{ I("l2_cache_req_stat.ic_fill_hit_s", 0x64, 0x2,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache hit non-modifiable line in L2")) },
{ I("l2_cache_req_stat.ic_fill_hit_x", 0x64, 0x4,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache hit modifiable line in L2")) },
{ I("l2_cache_req_stat.ic_fill_miss", 0x64, 0x1,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Instruction cache request miss in L2. Use"
"l2_cache_misses_from_ic_miss instead")) },
{ I("l2_cache_req_stat.ls_rd_blk_c", 0x64, 0x8,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Data cache request miss in L2 (all types). Use"
"l2_cache_misses_from_dc_misses instead")) },
{ I("l2_cache_req_stat.ls_rd_blk_cs", 0x64, 0x80,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Data cache shared read hit in L2")) },
{ I("l2_cache_req_stat.ls_rd_blk_l_hit_s", 0x64, 0x20,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Data cache read hit non-modifiable line in L2")) },
{ I("l2_cache_req_stat.ls_rd_blk_l_hit_x", 0x64, 0x40,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Data cache read hit in L2. Modifiable")) },
{ I("l2_cache_req_stat.ls_rd_blk_x", 0x64, 0x10,
STXT("Core to L2 cacheable request access status (not including L2"
"Prefetch). Data cache store or state change hit in L2")) },
{ I("l2_fill_pending.l2_fill_busy", 0x6d, 0x1,
STXT("Cycles with fill pending from L2. Total cycles spent with one or more"
"fill requests in flight from L2")) },
{ I("l2_latency.l2_cycles_waiting_on_fills", 0x62, 0x1,
STXT("Total cycles spent waiting for L2 fills to complete from L3 or memory,"
"divided by four. Event counts are for both threads. To calculate"
"average latency, the number of fills from both threads must be used")) },
{ I("l2_pf_hit_l2", 0x70, 0xff,
STXT("L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead")) },
{ I("l2_pf_miss_l2_hit_l3", 0x71, 0xff,
STXT("L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2"
"pipeline which miss the L2 cache and hit the L3")) },
{ I("l2_pf_miss_l2_l3", 0x72, 0xff,
STXT("L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the"
"L2 pipeline which miss the L2 and the L3 caches")) },
{ I("l2_request_g1.all_no_prefetch", 0x60, 0xf9, STXT("(null)")) },
{ I("l2_request_g1.cacheable_ic_read", 0x60, 0x10,
STXT("All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads")) },
{ I("l2_request_g1.change_to_x", 0x60, 0x8,
STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache state change"
"requests. Request change to writable, check L2 for current state")) },
{ I("l2_request_g1.group2", 0x60, 0x1,
STXT("Miscellaneous events covered in more detail by l2_request_g2 (PMCx061)")) },
{ I("l2_request_g1.l2_hw_pf", 0x60, 0x2,
STXT("All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All"
"prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2"
"hit/miss broken out in a separate perfmon event")) },
{ I("l2_request_g1.ls_rd_blk_c_s", 0x60, 0x20,
STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads")) },
{ I("l2_request_g1.prefetch_l2_cmd", 0x60, 0x4,
STXT("All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd")) },
{ I("l2_request_g1.rd_blk_l", 0x60, 0x80,
STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache reads"
"(including hardware and software prefetch)")) },
{ I("l2_request_g1.rd_blk_x", 0x60, 0x40,
STXT("All L2 Cache Requests (Breakdown 1 - Common). Data cache stores")) },
{ I("l2_request_g2.bus_locks_originator", 0x61, 0x2,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus locks")) },
{ I("l2_request_g2.bus_locks_responses", 0x61, 0x1,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response")) },
{ I("l2_request_g2.group1", 0x61, 0x80,
STXT("Miscellaneous events covered in more detail by l2_request_g1 (PMCx060)")) },
{ I("l2_request_g2.ic_rd_sized", 0x61, 0x10,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read"
"sized")) },
{ I("l2_request_g2.ic_rd_sized_nc", 0x61, 0x8,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read"
"sized non-cacheable")) },
{ I("l2_request_g2.ls_rd_sized", 0x61, 0x40,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized")) },
{ I("l2_request_g2.ls_rd_sized_nc", 0x61, 0x20,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized"
"non-cacheable")) },
{ I("l2_request_g2.smc_inval", 0x61, 0x4,
STXT("All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code"
"invalidates")) },
{ I("l2_wcb_req.cl_zero", 0x63, 0x1,
STXT("LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2"
"WCB (Write Combining Buffer) cache line zeroing requests")) },
{ I("l2_wcb_req.wcb_close", 0x63, 0x20,
STXT("LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write"
"Combining Buffer) close requests")) },
{ I("l2_wcb_req.wcb_write", 0x63, 0x40,
STXT("LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write"
"Combining Buffer) write requests")) },
{ I("l2_wcb_req.zero_byte_store", 0x63, 0x4,
STXT("LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB"
"(Write Combining Buffer) zero byte store requests")) },
{ I("op_cache_hit_miss.all_op_cache_accesses", 0x28f, 0x7,
STXT("All Op Cache accesses. Counts Op Cache micro-tag hit/miss events")) },
{ I("op_cache_hit_miss.op_cache_hit", 0x28f, 0x3,
STXT("Op Cache Hit. Counts Op Cache micro-tag hit/miss events")) },
{ I("op_cache_hit_miss.op_cache_miss", 0x28f, 0x4,
STXT("Op Cache Miss. Counts Op Cache micro-tag hit/miss events")) },
/* core: */
{ I("ex_div_busy", 0xd3, 0, STXT("Div Cycles Busy count")) },
{ I("ex_div_count", 0xd4, 0, STXT("Div Op Count")) },
{ I("ex_ret_brn", 0xc2, 0, STXT("Retired Branch Instructions")) },
{ I("ex_ret_brn_far", 0xc6, 0, STXT("Retired Far Control Transfers")) },
{ I("ex_ret_brn_ind_misp", 0xca, 0,
STXT("Retired Indirect Branch Instructions Mispredicted")) },
{ I("ex_ret_brn_misp", 0xc3, 0,
STXT("Retired Branch Instructions Mispredicted")) },
{ I("ex_ret_brn_resync", 0xc7, 0, STXT("Retired Branch Resyncs")) },
{ I("ex_ret_brn_tkn", 0xc4, 0, STXT("Retired Taken Branch Instructions")) },
{ I("ex_ret_brn_tkn_misp", 0xc5, 0,
STXT("Retired Taken Branch Instructions Mispredicted")) },
{ I("ex_ret_cond", 0xd1, 0,
STXT("Retired Conditional Branch Instructions")) },
{ I("ex_ret_fused_instr", 0x1d0, 0,
STXT("Counts retired Fused Instructions")) },
{ I("ex_ret_ind_brch_instr", 0xcc, 0,
STXT("Retired Indirect Branch Instructions. The number of indirect branches"
"retired")) },
{ I("ex_ret_instr", 0xc0, 0, STXT("Retired Instructions")) },
{ I("ex_ret_mmx_fp_instr.mmx_instr", 0xcb, 0x2, STXT("MMX instructions")) },
{ I("ex_ret_mmx_fp_instr.sse_instr", 0xcb, 0x4,
STXT("SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX)")) },
{ I("ex_ret_mmx_fp_instr.x87_instr", 0xcb, 0x1, STXT("x87 instructions")) },
{ I("ex_ret_msprd_brnch_instr_dir_msmtch", 0x1c7, 0,
STXT("Retired Mispredicted Branch Instructions due to Direction Mismatch")) },
{ I("ex_ret_near_ret", 0xc8, 0, STXT("Retired Near Returns")) },
{ I("ex_ret_near_ret_mispred", 0xc9, 0,
STXT("Retired Near Returns Mispredicted")) },
{ I("ex_ret_ops", 0xc1, 0,
STXT("Retired Ops. Use macro_ops_retired instead")) },
{ I("ex_tagged_ibs_ops.ibs_count_rollover", 0x1cf, 0x4,
STXT("Tagged IBS Ops. Number of times an op could not be tagged by IBS"
"because of a previous tagged op that has not retired")) },
{ I("ex_tagged_ibs_ops.ibs_tagged_ops", 0x1cf, 0x1,
STXT("Tagged IBS Ops. Number of Ops tagged by IBS")) },
{ I("ex_tagged_ibs_ops.ibs_tagged_ops_ret", 0x1cf, 0x2,
STXT("Tagged IBS Ops. Number of Ops tagged by IBS that retired")) },
/* floating point: */
{ I("fp_disp_faults.x87_fill_fault", 0xe, 0x1,
STXT("Floating Point Dispatch Faults. x87 fill fault")) },
{ I("fp_disp_faults.xmm_fill_fault", 0xe, 0x2,
STXT("Floating Point Dispatch Faults. XMM fill fault")) },
{ I("fp_disp_faults.ymm_fill_fault", 0xe, 0x4,
STXT("Floating Point Dispatch Faults. YMM fill fault")) },
{ I("fp_disp_faults.ymm_spill_fault", 0xe, 0x8,
STXT("Floating Point Dispatch Faults. YMM spill fault")) },
{ I("fp_num_mov_elim_scal_op.opt_potential", 0x4, 0x4,
STXT("Number of Ops that are candidates for optimization (have Z-bit either"
"set or pass). This is a dispatch based speculative event, and is"
"useful for measuring the effectiveness of the Move elimination and"
"Scalar code optimization schemes")) },
{ I("fp_num_mov_elim_scal_op.optimized", 0x4, 0x8,
STXT("Number of Scalar Ops optimized. This is a dispatch based speculative"
"event, and is useful for measuring the effectiveness of the Move"
"elimination and Scalar code optimization schemes")) },
{ I("fp_num_mov_elim_scal_op.sse_mov_ops", 0x4, 0x1,
STXT("Number of SSE Move Ops. This is a dispatch based speculative event,"
"and is useful for measuring the effectiveness of the Move elimination"
"and Scalar code optimization schemes")) },
{ I("fp_num_mov_elim_scal_op.sse_mov_ops_elim", 0x4, 0x2,
STXT("Number of SSE Move Ops eliminated. This is a dispatch based"
"speculative event, and is useful for measuring the effectiveness of"
"the Move elimination and Scalar code optimization schemes")) },
{ I("fp_ret_sse_avx_ops.add_sub_flops", 0x3, 0x1,
STXT("Add/subtract FLOPs. This is a retire-based event. The number of"
"retired SSE/AVX FLOPs. The number of events logged per cycle can vary"
"from 0 to 64. This event requires the use of the MergeEvent since it"
"can count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
"Cycle Events]. It does not provide a useful count without the use of"
"the MergeEvent")) },
{ I("fp_ret_sse_avx_ops.all", 0x3, 0xff,
STXT("All FLOPS. This is a retire-based event. The number of retired SSE/AVX"
"FLOPS. The number of events logged per cycle can vary from 0 to 64."
"This event can count above 15")) },
{ I("fp_ret_sse_avx_ops.div_flops", 0x3, 0x4,
STXT("Divide/square root FLOPs. This is a retire-based event. The number of"
"retired SSE/AVX FLOPs. The number of events logged per cycle can vary"
"from 0 to 64. This event requires the use of the MergeEvent since it"
"can count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
"Cycle Events]. It does not provide a useful count without the use of"
"the MergeEvent")) },
{ I("fp_ret_sse_avx_ops.mac_flops", 0x3, 0x8,
STXT("Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS."
"This is a retire-based event. The number of retired SSE/AVX FLOPs. The"
"number of events logged per cycle can vary from 0 to 64. This event"
"requires the use of the MergeEvent since it can count above 15 events"
"per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does"
"not provide a useful count without the use of the MergeEvent")) },
{ I("fp_ret_sse_avx_ops.mult_flops", 0x3, 0x2,
STXT("Multiply FLOPs. This is a retire-based event. The number of retired"
"SSE/AVX FLOPs. The number of events logged per cycle can vary from 0"
"to 64. This event requires the use of the MergeEvent since it can"
"count above 15 events per cycle. See 2.1.17.3 [Large Increment per"
"Cycle Events]. It does not provide a useful count without the use of"
"the MergeEvent")) },
{ I("fp_retired_ser_ops.sse_bot_ret", 0x5, 0x8,
STXT("SSE/AVX bottom-executing ops retired. The number of serializing Ops"
"retired")) },
{ I("fp_retired_ser_ops.sse_ctrl_ret", 0x5, 0x4,
STXT("SSE/AVX control word mispredict traps. The number of serializing Ops"
"retired")) },
{ I("fp_retired_ser_ops.x87_bot_ret", 0x5, 0x2,
STXT("x87 bottom-executing ops retired. The number of serializing Ops"
"retired")) },
{ I("fp_retired_ser_ops.x87_ctrl_ret", 0x5, 0x1,
STXT("x87 control word mispredict traps due to mispredictions in RC or PC,"
"or changes in mask bits. The number of serializing Ops retired")) },
{ I("fpu_pipe_assignment.total", 0, 0xf, STXT("Total number of fp uOps")) },
{ I("fpu_pipe_assignment.total0", 0, 0x1,
STXT("Total number of fp uOps on pipe 0")) },
{ I("fpu_pipe_assignment.total1", 0, 0x2,
STXT("Total number uOps assigned to pipe 1")) },
{ I("fpu_pipe_assignment.total2", 0, 0x4,
STXT("Total number uOps assigned to pipe 2")) },
{ I("fpu_pipe_assignment.total3", 0, 0x8,
STXT("Total number uOps assigned to pipe 3")) },
/* memory: */
{ I("ls_alloc_mab_count", 0x5f, 0, STXT("Count of Allocated Mabs")) },
{ I("ls_any_fills_from_sys.ext_cache_local", 0x44, 0x4,
STXT("Any Data Cache Fills by Data Source. From cache of different CCX in"
"same node")) },
{ I("ls_any_fills_from_sys.ext_cache_remote", 0x44, 0x10,
STXT("Any Data Cache Fills by Data Source. From CCX Cache in different Node")) },
{ I("ls_any_fills_from_sys.int_cache", 0x44, 0x2,
STXT("Any Data Cache Fills by Data Source. From L3 or different L2 in same"
"CCX")) },
{ I("ls_any_fills_from_sys.lcl_l2", 0x44, 0x1,
STXT("Any Data Cache Fills by Data Source. From Local L2 to the core")) },
{ I("ls_any_fills_from_sys.mem_io_local", 0x44, 0x8,
STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in same"
"node")) },
{ I("ls_any_fills_from_sys.mem_io_remote", 0x44, 0x40,
STXT("Any Data Cache Fills by Data Source. From DRAM or IO connected in"
"different Node")) },
{ I("ls_bad_status2.stli_other", 0x24, 0x2,
STXT("Non-forwardable conflict; used to reduce STLI's via software. All"
"reasons. Store To Load Interlock (STLI) are loads that were unable to"
"complete because of a possible match with an older store, and the"
"older store could not do STLF for some reason")) },
{ I("ls_dc_accesses", 0x40, 0,
STXT("Number of accesses to the dcache for load/store references")) },
{ I("ls_dispatch.ld_dispatch", 0x29, 0x1,
STXT("Dispatch of a single op that performs a memory load. Counts the number"
"of operations dispatched to the LS unit. Unit Masks ADDed")) },
{ I("ls_dispatch.ld_st_dispatch", 0x29, 0x4,
STXT("Load-op-Store Dispatch. Dispatch of a single op that performs a load"
"from and store to the same memory address. Counts the number of"
"operations dispatched to the LS unit. Unit Masks ADDed")) },
{ I("ls_dispatch.store_dispatch", 0x29, 0x2,
STXT("Dispatch of a single op that performs a memory store. Counts the"
"number of operations dispatched to the LS unit. Unit Masks ADDed")) },
{ I("ls_dmnd_fills_from_sys.ext_cache_local", 0x43, 0x4,
STXT("Demand Data Cache Fills by Data Source. From cache of different CCX in"
"same node")) },
{ I("ls_dmnd_fills_from_sys.ext_cache_remote", 0x43, 0x10,
STXT("Demand Data Cache Fills by Data Source. From CCX Cache in different"
"Node")) },
{ I("ls_dmnd_fills_from_sys.int_cache", 0x43, 0x2,
STXT("Demand Data Cache Fills by Data Source. From L3 or different L2 in"
"same CCX")) },
{ I("ls_dmnd_fills_from_sys.lcl_l2", 0x43, 0x1,
STXT("Demand Data Cache Fills by Data Source. From Local L2 to the core")) },
{ I("ls_dmnd_fills_from_sys.mem_io_local", 0x43, 0x8,
STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in"
"same node")) },
{ I("ls_dmnd_fills_from_sys.mem_io_remote", 0x43, 0x40,
STXT("Demand Data Cache Fills by Data Source. From DRAM or IO connected in"
"different Node")) },
{ I("ls_hw_pf_dc_fills.ext_cache_local", 0x5a, 0x4,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From cache of"
"different CCX in same node")) },
{ I("ls_hw_pf_dc_fills.ext_cache_remote", 0x5a, 0x10,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in"
"different Node")) },
{ I("ls_hw_pf_dc_fills.int_cache", 0x5a, 0x2,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From L3 or"
"different L2 in same CCX")) },
{ I("ls_hw_pf_dc_fills.lcl_l2", 0x5a, 0x1,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to"
"the core")) },
{ I("ls_hw_pf_dc_fills.mem_io_local", 0x5a, 0x8,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO"
"connected in same node")) },
{ I("ls_hw_pf_dc_fills.mem_io_remote", 0x5a, 0x40,
STXT("Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO"
"connected in different Node")) },
{ I("ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", 0x52, 0x1,
STXT("The number of software prefetches that did not fetch data outside of"
"the processor core. Software PREFETCH instruction saw a DC hit")) },
{ I("ls_inef_sw_pref.mab_mch_cnt", 0x52, 0x2,
STXT("The number of software prefetches that did not fetch data outside of"
"the processor core. Software PREFETCH instruction saw a match on an"
"already-allocated miss request buffer")) },
{ I("ls_int_taken", 0x2c, 0,
STXT("Counts the number of interrupts taken")) },
{ I("ls_l1_d_tlb_miss.all", 0x45, 0xff,
STXT("All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", 0x45, 0x8,
STXT("L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", 0x45, 0x80,
STXT("L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", 0x45, 0x4,
STXT("L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", 0x45, 0x40,
STXT("L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", 0x45, 0x1,
STXT("L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", 0x45, 0x10,
STXT("L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", 0x45, 0x2,
STXT("L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB")) },
{ I("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", 0x45, 0x20,
STXT("L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2"
"TLB")) },
{ I("ls_locks.bus_lock", 0x25, 0x1,
STXT("Retired lock instructions. Comparable to legacy bus lock")) },
{ I("ls_locks.non_spec_lock", 0x25, 0x2,
STXT("Retired lock instructions. Non-speculative lock succeeded")) },
{ I("ls_locks.spec_lock_hi_spec", 0x25, 0x8,
STXT("Retired lock instructions. High speculative cacheable lock speculation"
"succeeded")) },
{ I("ls_locks.spec_lock_lo_spec", 0x25, 0x4,
STXT("Retired lock instructions. Low speculative cacheable lock speculation"
"succeeded")) },
{ I("ls_mab_alloc.all_allocations", 0x41, 0x7f,
STXT("All Allocations. Counts when a LS pipe allocates a MAB entry")) },
{ I("ls_mab_alloc.dc_prefetcher", 0x41, 0x8,
STXT("LS MAB Allocates by Type. DC prefetcher")) },
{ I("ls_mab_alloc.hardware_prefetcher_allocations", 0x41, 0x40,
STXT("Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB"
"entry")) },
{ I("ls_mab_alloc.load_store_allocations", 0x41, 0x3f,
STXT("Load Store Allocations. Counts when a LS pipe allocates a MAB entry")) },
{ I("ls_mab_alloc.loads", 0x41, 0x1,
STXT("LS MAB Allocates by Type. Loads")) },
{ I("ls_mab_alloc.stores", 0x41, 0x2,
STXT("LS MAB Allocates by Type. Stores")) },
{ I("ls_misal_loads.ma4k", 0x47, 0x2,
STXT("The number of 4KB misaligned (i.e., page crossing) loads")) },
{ I("ls_misal_loads.ma64", 0x47, 0x1,
STXT("The number of 64B misaligned (i.e., cacheline crossing) loads")) },
{ I("ls_not_halted_cyc", 0x76, 0, STXT("Cycles not in Halt")) },
{ I("ls_pref_instr_disp", 0x4b, 0xff,
STXT("Software Prefetch Instructions Dispatched (Speculative)")) },
{ I("ls_pref_instr_disp.prefetch", 0x4b, 0x1,
STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchT0,"
"T1 and T2 instructions. See docAPM3 PREFETCHlevel")) },
{ I("ls_pref_instr_disp.prefetch_nta", 0x4b, 0x4,
STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA"
"instruction. See docAPM3 PREFETCHlevel")) },
{ I("ls_pref_instr_disp.prefetch_w", 0x4b, 0x2,
STXT("Software Prefetch Instructions Dispatched (Speculative). PrefetchW"
"instruction. See docAPM3 PREFETCHW")) },
{ I("ls_rdtsc", 0x2d, 0,
STXT("Number of reads of the TSC (RDTSC instructions). The count is"
"speculative")) },
{ I("ls_ret_cl_flush", 0x26, 0,
STXT("The number of retired CLFLUSH instructions. This is a non-speculative"
"event")) },
{ I("ls_ret_cpuid", 0x27, 0,
STXT("The number of CPUID instructions retired")) },
{ I("ls_smi_rx", 0x2b, 0, STXT("Counts the number of SMIs received")) },
{ I("ls_st_commit_cancel2.st_commit_cancel_wcb_full", 0x37, 0x1,
STXT("A non-cacheable store and the non-cacheable commit buffer is full")) },
{ I("ls_stlf", 0x35, 0, STXT("Number of STLF hits")) },
{ I("ls_sw_pf_dc_fills.ext_cache_local", 0x59, 0x4,
STXT("Software Prefetch Data Cache Fills by Data Source. From cache of"
"different CCX in same node")) },
{ I("ls_sw_pf_dc_fills.ext_cache_remote", 0x59, 0x10,
STXT("Software Prefetch Data Cache Fills by Data Source. From CCX Cache in"
"different Node")) },
{ I("ls_sw_pf_dc_fills.int_cache", 0x59, 0x2,
STXT("Software Prefetch Data Cache Fills by Data Source. From L3 or"
"different L2 in same CCX")) },
{ I("ls_sw_pf_dc_fills.lcl_l2", 0x59, 0x1,
STXT("Software Prefetch Data Cache Fills by Data Source. From Local L2 to"
"the core")) },
{ I("ls_sw_pf_dc_fills.mem_io_local", 0x59, 0x8,
STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO"
"connected in same node")) },
{ I("ls_sw_pf_dc_fills.mem_io_remote", 0x59, 0x40,
STXT("Software Prefetch Data Cache Fills by Data Source. From DRAM or IO"
"connected in different Node")) },
{ I("ls_tablewalker.dc_type0", 0x46, 0x1,
STXT("Total Page Table Walks DC Type 0")) },
{ I("ls_tablewalker.dc_type1", 0x46, 0x2,
STXT("Total Page Table Walks DC Type 1")) },
{ I("ls_tablewalker.dside", 0x46, 0x3,
STXT("Total Page Table Walks on D-side")) },
{ I("ls_tablewalker.ic_type0", 0x46, 0x4,
STXT("Total Page Table Walks IC Type 0")) },
{ I("ls_tablewalker.ic_type1", 0x46, 0x8,
STXT("Total Page Table Walks IC Type 1")) },
{ I("ls_tablewalker.iside", 0x46, 0xc,
STXT("Total Page Table Walks on I-side")) },
{ I("ls_tlb_flush.all_tlb_flushes", 0x78, 0xff,
STXT("All TLB Flushes. Requires unit mask 0xFF to engage event for counting."
"Use all_tlbs_flushed instead")) },
/* other: */
{ I("de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", 0xab, 0x4,
STXT("Any FP dispatch. Types of Oops Dispatched from Decoder")) },
{ I("de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", 0xab, 0x8,
STXT("Any Integer dispatch. Types of Oops Dispatched from Decoder")) },
{ I("de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", 0xae, 0x80,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. FP Flush"
"recovery stall")) },
{ I("de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", 0xae, 0x20,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. Floating"
"point register file resource stall. Applies to all FP ops that have a"
"destination register")) },
{ I("de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", 0xae, 0x40,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. FP"
"scheduler resource stall. Applies to ops that use the FP scheduler")) },
{ I("de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", 0xae, 0x1,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. Integer"
"Physical Register File resource stall. Integer Physical Register File,"
"applies to all ops that have an integer destination register")) },
{ I("de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", 0xae, 0x8,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. Integer Scheduler miscellaneous resource stall")) },
{ I("de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", 0xae, 0x2,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. Load"
"Queue resource stall. Applies to all ops with load semantics")) },
{ I("de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", 0xae, 0x4,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. Store"
"Queue resource stall. Applies to all ops with store semantics")) },
{ I("de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", 0xae, 0x10,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a Token Stall. Also counts cycles when the thread is not selected"
"to dispatch but would have been stalled due to a Token Stall. Taken"
"branch buffer resource stall")) },
{ I("de_dis_dispatch_token_stalls2.agsq_token_stall", 0xaf, 0x10,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. AGSQ Tokens unavailable")) },
{ I("de_dis_dispatch_token_stalls2.int_sch0_token_stall", 0xaf, 0x1,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. No tokens for Integer Scheduler Queue 0 available")) },
{ I("de_dis_dispatch_token_stalls2.int_sch1_token_stall", 0xaf, 0x2,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. No tokens for Integer Scheduler Queue 1 available")) },
{ I("de_dis_dispatch_token_stalls2.int_sch2_token_stall", 0xaf, 0x4,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. No tokens for Integer Scheduler Queue 2 available")) },
{ I("de_dis_dispatch_token_stalls2.int_sch3_token_stall", 0xaf, 0x8,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. No tokens for Integer Scheduler Queue 3 available")) },
{ I("de_dis_dispatch_token_stalls2.retire_token_stall", 0xaf, 0x20,
STXT("Cycles where a dispatch group is valid but does not get dispatched due"
"to a token stall. Insufficient Retire Queue tokens available")) },
{ I("de_dis_uop_queue_empty_di0", 0xa9, 0,
STXT("Cycles where the Micro-Op Queue is empty")) },
/* recommended: */
{ I("all_data_cache_accesses", 0x29, 0x7,
STXT("All L1 Data Cache Accesses")) },
{ I("all_tlbs_flushed", 0x78, 0xff, STXT("All TLBs Flushed")) },
{ I("l1_data_cache_fills_all", 0x44, 0xff,
STXT("L1 Data Cache Fills: All")) },
{ I("l1_data_cache_fills_from_external_ccx_cache", 0x44, 0x14,
STXT("L1 Data Cache Fills: From External CCX Cache")) },
{ I("l1_data_cache_fills_from_memory", 0x44, 0x48,
STXT("L1 Data Cache Fills: From Memory")) },
{ I("l1_data_cache_fills_from_remote_node", 0x44, 0x50,
STXT("L1 Data Cache Fills: From Remote Node")) },
{ I("l1_data_cache_fills_from_within_same_ccx", 0x44, 0x3,
STXT("L1 Data Cache Fills: From within same CCX")) },
{ I("l1_dtlb_misses", 0x45, 0xff, STXT("L1 DTLB Misses")) },
{ I("l2_cache_accesses_from_dc_misses", 0x60, 0xe8,
STXT("L2 Cache Accesses from L1 Data Cache Misses (including prefetch)")) },
{ I("l2_cache_accesses_from_ic_misses", 0x60, 0x10,
STXT("L2 Cache Accesses from L1 Instruction Cache Misses (including"
"prefetch)")) },
{ I("l2_cache_hits_from_dc_misses", 0x64, 0xf0,
STXT("L2 Cache Hits from L1 Data Cache Misses")) },
{ I("l2_cache_hits_from_ic_misses", 0x64, 0x6,
STXT("L2 Cache Hits from L1 Instruction Cache Misses")) },
{ I("l2_cache_hits_from_l2_hwpf", 0x70, 0xff,
STXT("L2 Cache Hits from L2 Cache HWPF")) },
{ I("l2_cache_misses_from_dc_misses", 0x64, 0x8,
STXT("L2 Cache Misses from L1 Data Cache Misses")) },
{ I("l2_cache_misses_from_ic_miss", 0x64, 0x1,
STXT("L2 Cache Misses from L1 Instruction Cache Misses")) },
{ I("l2_dtlb_misses", 0x45, 0xf0,
STXT("L2 DTLB Misses & Data page walks")) },
{ I("l2_itlb_misses", 0x85, 0x7,
STXT("L2 ITLB Misses & Instruction page walks")) },
{ I("macro_ops_retired", 0xc1, 0, STXT("Macro-ops Retired")) },
{ I("sse_avx_stalls", 0xe, 0xe, STXT("Mixed SSE/AVX Stalls")) },
{ NULL, NULL, 0, NULL }
};
#undef I