mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-04 11:43:54 +08:00
fdfbbd07e9
Add a generic qualifier for transaction events, as a new sample type that returns a flag word. This is particularly useful for qualifying aborts: to distinguish aborts which happen due to asynchronous events (like conflicts caused by another CPU) versus instructions that lead to an abort. The tuning strategies are very different for those cases, so it's important to distinguish them easily and early. Since it's inconvenient and inflexible to filter for this in the kernel we report all the events out and allow some post processing in user space. The flags are based on the Intel TSX events, but should be fairly generic and mostly applicable to other HTM architectures too. In addition to various flag words there's also reserved space to report an program supplied abort code. For TSX this is used to distinguish specific classes of aborts, like a lock busy abort when doing lock elision. Flags: Elision and generic transactions (ELISION vs TRANSACTION) (HLE vs RTM on TSX; IBM etc. would likely only use TRANSACTION) Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC) Retryable transaction (RETRY) Conflicts with other threads (CONFLICT) Transaction write capacity overflow (CAPACITY WRITE) Transaction read capacity overflow (CAPACITY READ) Transactions implicitely aborted can also return an abort code. This can be used to signal specific events to the profiler. A common case is abort on lock busy in a RTM eliding library (code 0xff) To handle this case we include the TSX abort code Common example aborts in TSX would be: - Data conflict with another thread on memory read. Flags: TRANSACTION|ASYNC|CONFLICT - executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC - HLE transaction in user space is too large Flags: ELISION|SYNC|CAPACITY-WRITE The only flag that is somewhat TSX specific is ELISION. This adds the perf core glue needed for reporting the new flag word out. v2: Add MEM/MISC v3: Move transaction to the end v4: Separate capacity-read/write and remove misc v5: Remove _SAMPLE. Move abort flags to 32bit. Rename transaction to txn Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
814 lines
23 KiB
C
814 lines
23 KiB
C
/*
|
|
* Performance events:
|
|
*
|
|
* Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
|
|
* Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
|
|
* Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
|
|
*
|
|
* Data type definitions, declarations, prototypes.
|
|
*
|
|
* Started by: Thomas Gleixner and Ingo Molnar
|
|
*
|
|
* For licencing details see kernel-base/COPYING
|
|
*/
|
|
#ifndef _UAPI_LINUX_PERF_EVENT_H
|
|
#define _UAPI_LINUX_PERF_EVENT_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/ioctl.h>
|
|
#include <asm/byteorder.h>
|
|
|
|
/*
|
|
* User-space ABI bits:
|
|
*/
|
|
|
|
/*
|
|
* attr.type
|
|
*/
|
|
enum perf_type_id {
|
|
PERF_TYPE_HARDWARE = 0,
|
|
PERF_TYPE_SOFTWARE = 1,
|
|
PERF_TYPE_TRACEPOINT = 2,
|
|
PERF_TYPE_HW_CACHE = 3,
|
|
PERF_TYPE_RAW = 4,
|
|
PERF_TYPE_BREAKPOINT = 5,
|
|
|
|
PERF_TYPE_MAX, /* non-ABI */
|
|
};
|
|
|
|
/*
|
|
* Generalized performance event event_id types, used by the
|
|
* attr.event_id parameter of the sys_perf_event_open()
|
|
* syscall:
|
|
*/
|
|
enum perf_hw_id {
|
|
/*
|
|
* Common hardware events, generalized by the kernel:
|
|
*/
|
|
PERF_COUNT_HW_CPU_CYCLES = 0,
|
|
PERF_COUNT_HW_INSTRUCTIONS = 1,
|
|
PERF_COUNT_HW_CACHE_REFERENCES = 2,
|
|
PERF_COUNT_HW_CACHE_MISSES = 3,
|
|
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
|
|
PERF_COUNT_HW_BRANCH_MISSES = 5,
|
|
PERF_COUNT_HW_BUS_CYCLES = 6,
|
|
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
|
|
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
|
|
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
|
|
|
|
PERF_COUNT_HW_MAX, /* non-ABI */
|
|
};
|
|
|
|
/*
|
|
* Generalized hardware cache events:
|
|
*
|
|
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
|
|
* { read, write, prefetch } x
|
|
* { accesses, misses }
|
|
*/
|
|
enum perf_hw_cache_id {
|
|
PERF_COUNT_HW_CACHE_L1D = 0,
|
|
PERF_COUNT_HW_CACHE_L1I = 1,
|
|
PERF_COUNT_HW_CACHE_LL = 2,
|
|
PERF_COUNT_HW_CACHE_DTLB = 3,
|
|
PERF_COUNT_HW_CACHE_ITLB = 4,
|
|
PERF_COUNT_HW_CACHE_BPU = 5,
|
|
PERF_COUNT_HW_CACHE_NODE = 6,
|
|
|
|
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
|
|
};
|
|
|
|
enum perf_hw_cache_op_id {
|
|
PERF_COUNT_HW_CACHE_OP_READ = 0,
|
|
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
|
|
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
|
|
|
|
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
|
|
};
|
|
|
|
enum perf_hw_cache_op_result_id {
|
|
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
|
|
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
|
|
|
|
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
|
|
};
|
|
|
|
/*
|
|
* Special "software" events provided by the kernel, even if the hardware
|
|
* does not support performance events. These events measure various
|
|
* physical and sw events of the kernel (and allow the profiling of them as
|
|
* well):
|
|
*/
|
|
enum perf_sw_ids {
|
|
PERF_COUNT_SW_CPU_CLOCK = 0,
|
|
PERF_COUNT_SW_TASK_CLOCK = 1,
|
|
PERF_COUNT_SW_PAGE_FAULTS = 2,
|
|
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
|
|
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
|
|
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
|
|
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
|
|
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
|
|
PERF_COUNT_SW_EMULATION_FAULTS = 8,
|
|
PERF_COUNT_SW_DUMMY = 9,
|
|
|
|
PERF_COUNT_SW_MAX, /* non-ABI */
|
|
};
|
|
|
|
/*
|
|
* Bits that can be set in attr.sample_type to request information
|
|
* in the overflow packets.
|
|
*/
|
|
enum perf_event_sample_format {
|
|
PERF_SAMPLE_IP = 1U << 0,
|
|
PERF_SAMPLE_TID = 1U << 1,
|
|
PERF_SAMPLE_TIME = 1U << 2,
|
|
PERF_SAMPLE_ADDR = 1U << 3,
|
|
PERF_SAMPLE_READ = 1U << 4,
|
|
PERF_SAMPLE_CALLCHAIN = 1U << 5,
|
|
PERF_SAMPLE_ID = 1U << 6,
|
|
PERF_SAMPLE_CPU = 1U << 7,
|
|
PERF_SAMPLE_PERIOD = 1U << 8,
|
|
PERF_SAMPLE_STREAM_ID = 1U << 9,
|
|
PERF_SAMPLE_RAW = 1U << 10,
|
|
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
|
|
PERF_SAMPLE_REGS_USER = 1U << 12,
|
|
PERF_SAMPLE_STACK_USER = 1U << 13,
|
|
PERF_SAMPLE_WEIGHT = 1U << 14,
|
|
PERF_SAMPLE_DATA_SRC = 1U << 15,
|
|
PERF_SAMPLE_IDENTIFIER = 1U << 16,
|
|
PERF_SAMPLE_TRANSACTION = 1U << 17,
|
|
|
|
PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
|
|
};
|
|
|
|
/*
|
|
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
|
|
*
|
|
* If the user does not pass priv level information via branch_sample_type,
|
|
* the kernel uses the event's priv level. Branch and event priv levels do
|
|
* not have to match. Branch priv level is checked for permissions.
|
|
*
|
|
* The branch types can be combined, however BRANCH_ANY covers all types
|
|
* of branches and therefore it supersedes all the other types.
|
|
*/
|
|
enum perf_branch_sample_type {
|
|
PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
|
|
PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
|
|
PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
|
|
|
|
PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
|
|
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
|
|
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
|
|
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
|
|
PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
|
|
PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
|
|
PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
|
|
|
|
PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */
|
|
};
|
|
|
|
#define PERF_SAMPLE_BRANCH_PLM_ALL \
|
|
(PERF_SAMPLE_BRANCH_USER|\
|
|
PERF_SAMPLE_BRANCH_KERNEL|\
|
|
PERF_SAMPLE_BRANCH_HV)
|
|
|
|
/*
|
|
* Values to determine ABI of the registers dump.
|
|
*/
|
|
enum perf_sample_regs_abi {
|
|
PERF_SAMPLE_REGS_ABI_NONE = 0,
|
|
PERF_SAMPLE_REGS_ABI_32 = 1,
|
|
PERF_SAMPLE_REGS_ABI_64 = 2,
|
|
};
|
|
|
|
/*
|
|
* Values for the memory transaction event qualifier, mostly for
|
|
* abort events. Multiple bits can be set.
|
|
*/
|
|
enum {
|
|
PERF_TXN_ELISION = (1 << 0), /* From elision */
|
|
PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
|
|
PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
|
|
PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
|
|
PERF_TXN_RETRY = (1 << 4), /* Retry possible */
|
|
PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
|
|
PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
|
|
PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
|
|
|
|
PERF_TXN_MAX = (1 << 8), /* non-ABI */
|
|
|
|
/* bits 32..63 are reserved for the abort code */
|
|
|
|
PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
|
|
PERF_TXN_ABORT_SHIFT = 32,
|
|
};
|
|
|
|
/*
|
|
* The format of the data returned by read() on a perf event fd,
|
|
* as specified by attr.read_format:
|
|
*
|
|
* struct read_format {
|
|
* { u64 value;
|
|
* { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
|
|
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
|
|
* { u64 id; } && PERF_FORMAT_ID
|
|
* } && !PERF_FORMAT_GROUP
|
|
*
|
|
* { u64 nr;
|
|
* { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
|
|
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
|
|
* { u64 value;
|
|
* { u64 id; } && PERF_FORMAT_ID
|
|
* } cntr[nr];
|
|
* } && PERF_FORMAT_GROUP
|
|
* };
|
|
*/
|
|
enum perf_event_read_format {
|
|
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
|
|
PERF_FORMAT_ID = 1U << 2,
|
|
PERF_FORMAT_GROUP = 1U << 3,
|
|
|
|
PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
|
|
};
|
|
|
|
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
|
|
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
|
|
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
|
|
#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
|
|
/* add: sample_stack_user */
|
|
|
|
/*
|
|
* Hardware event_id to monitor via a performance monitoring event:
|
|
*/
|
|
struct perf_event_attr {
|
|
|
|
/*
|
|
* Major type: hardware/software/tracepoint/etc.
|
|
*/
|
|
__u32 type;
|
|
|
|
/*
|
|
* Size of the attr structure, for fwd/bwd compat.
|
|
*/
|
|
__u32 size;
|
|
|
|
/*
|
|
* Type specific configuration information.
|
|
*/
|
|
__u64 config;
|
|
|
|
union {
|
|
__u64 sample_period;
|
|
__u64 sample_freq;
|
|
};
|
|
|
|
__u64 sample_type;
|
|
__u64 read_format;
|
|
|
|
__u64 disabled : 1, /* off by default */
|
|
inherit : 1, /* children inherit it */
|
|
pinned : 1, /* must always be on PMU */
|
|
exclusive : 1, /* only group on PMU */
|
|
exclude_user : 1, /* don't count user */
|
|
exclude_kernel : 1, /* ditto kernel */
|
|
exclude_hv : 1, /* ditto hypervisor */
|
|
exclude_idle : 1, /* don't count when idle */
|
|
mmap : 1, /* include mmap data */
|
|
comm : 1, /* include comm data */
|
|
freq : 1, /* use freq, not period */
|
|
inherit_stat : 1, /* per task counts */
|
|
enable_on_exec : 1, /* next exec enables */
|
|
task : 1, /* trace fork/exit */
|
|
watermark : 1, /* wakeup_watermark */
|
|
/*
|
|
* precise_ip:
|
|
*
|
|
* 0 - SAMPLE_IP can have arbitrary skid
|
|
* 1 - SAMPLE_IP must have constant skid
|
|
* 2 - SAMPLE_IP requested to have 0 skid
|
|
* 3 - SAMPLE_IP must have 0 skid
|
|
*
|
|
* See also PERF_RECORD_MISC_EXACT_IP
|
|
*/
|
|
precise_ip : 2, /* skid constraint */
|
|
mmap_data : 1, /* non-exec mmap data */
|
|
sample_id_all : 1, /* sample_type all events */
|
|
|
|
exclude_host : 1, /* don't count in host */
|
|
exclude_guest : 1, /* don't count in guest */
|
|
|
|
exclude_callchain_kernel : 1, /* exclude kernel callchains */
|
|
exclude_callchain_user : 1, /* exclude user callchains */
|
|
mmap2 : 1, /* include mmap with inode data */
|
|
|
|
__reserved_1 : 40;
|
|
|
|
union {
|
|
__u32 wakeup_events; /* wakeup every n events */
|
|
__u32 wakeup_watermark; /* bytes before wakeup */
|
|
};
|
|
|
|
__u32 bp_type;
|
|
union {
|
|
__u64 bp_addr;
|
|
__u64 config1; /* extension of config */
|
|
};
|
|
union {
|
|
__u64 bp_len;
|
|
__u64 config2; /* extension of config1 */
|
|
};
|
|
__u64 branch_sample_type; /* enum perf_branch_sample_type */
|
|
|
|
/*
|
|
* Defines set of user regs to dump on samples.
|
|
* See asm/perf_regs.h for details.
|
|
*/
|
|
__u64 sample_regs_user;
|
|
|
|
/*
|
|
* Defines size of the user stack to dump on samples.
|
|
*/
|
|
__u32 sample_stack_user;
|
|
|
|
/* Align to u64. */
|
|
__u32 __reserved_2;
|
|
};
|
|
|
|
#define perf_flags(attr) (*(&(attr)->read_format + 1))
|
|
|
|
/*
|
|
* Ioctls that can be done on a perf event fd:
|
|
*/
|
|
#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
|
|
#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
|
|
#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
|
|
#define PERF_EVENT_IOC_RESET _IO ('$', 3)
|
|
#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
|
|
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
|
|
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
|
|
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
|
|
|
|
enum perf_event_ioc_flags {
|
|
PERF_IOC_FLAG_GROUP = 1U << 0,
|
|
};
|
|
|
|
/*
|
|
* Structure of the page that can be mapped via mmap
|
|
*/
|
|
struct perf_event_mmap_page {
|
|
__u32 version; /* version number of this structure */
|
|
__u32 compat_version; /* lowest version this is compat with */
|
|
|
|
/*
|
|
* Bits needed to read the hw events in user-space.
|
|
*
|
|
* u32 seq, time_mult, time_shift, idx, width;
|
|
* u64 count, enabled, running;
|
|
* u64 cyc, time_offset;
|
|
* s64 pmc = 0;
|
|
*
|
|
* do {
|
|
* seq = pc->lock;
|
|
* barrier()
|
|
*
|
|
* enabled = pc->time_enabled;
|
|
* running = pc->time_running;
|
|
*
|
|
* if (pc->cap_usr_time && enabled != running) {
|
|
* cyc = rdtsc();
|
|
* time_offset = pc->time_offset;
|
|
* time_mult = pc->time_mult;
|
|
* time_shift = pc->time_shift;
|
|
* }
|
|
*
|
|
* idx = pc->index;
|
|
* count = pc->offset;
|
|
* if (pc->cap_usr_rdpmc && idx) {
|
|
* width = pc->pmc_width;
|
|
* pmc = rdpmc(idx - 1);
|
|
* }
|
|
*
|
|
* barrier();
|
|
* } while (pc->lock != seq);
|
|
*
|
|
* NOTE: for obvious reason this only works on self-monitoring
|
|
* processes.
|
|
*/
|
|
__u32 lock; /* seqlock for synchronization */
|
|
__u32 index; /* hardware event identifier */
|
|
__s64 offset; /* add to hardware event value */
|
|
__u64 time_enabled; /* time event active */
|
|
__u64 time_running; /* time event on cpu */
|
|
union {
|
|
__u64 capabilities;
|
|
struct {
|
|
__u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
|
|
cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
|
|
|
|
cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
|
|
cap_user_time : 1, /* The time_* fields are used */
|
|
cap_user_time_zero : 1, /* The time_zero field is used */
|
|
cap_____res : 59;
|
|
};
|
|
};
|
|
|
|
/*
|
|
* If cap_usr_rdpmc this field provides the bit-width of the value
|
|
* read using the rdpmc() or equivalent instruction. This can be used
|
|
* to sign extend the result like:
|
|
*
|
|
* pmc <<= 64 - width;
|
|
* pmc >>= 64 - width; // signed shift right
|
|
* count += pmc;
|
|
*/
|
|
__u16 pmc_width;
|
|
|
|
/*
|
|
* If cap_usr_time the below fields can be used to compute the time
|
|
* delta since time_enabled (in ns) using rdtsc or similar.
|
|
*
|
|
* u64 quot, rem;
|
|
* u64 delta;
|
|
*
|
|
* quot = (cyc >> time_shift);
|
|
* rem = cyc & ((1 << time_shift) - 1);
|
|
* delta = time_offset + quot * time_mult +
|
|
* ((rem * time_mult) >> time_shift);
|
|
*
|
|
* Where time_offset,time_mult,time_shift and cyc are read in the
|
|
* seqcount loop described above. This delta can then be added to
|
|
* enabled and possible running (if idx), improving the scaling:
|
|
*
|
|
* enabled += delta;
|
|
* if (idx)
|
|
* running += delta;
|
|
*
|
|
* quot = count / running;
|
|
* rem = count % running;
|
|
* count = quot * enabled + (rem * enabled) / running;
|
|
*/
|
|
__u16 time_shift;
|
|
__u32 time_mult;
|
|
__u64 time_offset;
|
|
/*
|
|
* If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
|
|
* from sample timestamps.
|
|
*
|
|
* time = timestamp - time_zero;
|
|
* quot = time / time_mult;
|
|
* rem = time % time_mult;
|
|
* cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
|
|
*
|
|
* And vice versa:
|
|
*
|
|
* quot = cyc >> time_shift;
|
|
* rem = cyc & ((1 << time_shift) - 1);
|
|
* timestamp = time_zero + quot * time_mult +
|
|
* ((rem * time_mult) >> time_shift);
|
|
*/
|
|
__u64 time_zero;
|
|
__u32 size; /* Header size up to __reserved[] fields. */
|
|
|
|
/*
|
|
* Hole for extension of the self monitor capabilities
|
|
*/
|
|
|
|
__u8 __reserved[118*8+4]; /* align to 1k. */
|
|
|
|
/*
|
|
* Control data for the mmap() data buffer.
|
|
*
|
|
* User-space reading the @data_head value should issue an rmb(), on
|
|
* SMP capable platforms, after reading this value -- see
|
|
* perf_event_wakeup().
|
|
*
|
|
* When the mapping is PROT_WRITE the @data_tail value should be
|
|
* written by userspace to reflect the last read data. In this case
|
|
* the kernel will not over-write unread data.
|
|
*/
|
|
__u64 data_head; /* head in the data section */
|
|
__u64 data_tail; /* user-space written tail */
|
|
};
|
|
|
|
#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
|
|
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
|
|
#define PERF_RECORD_MISC_KERNEL (1 << 0)
|
|
#define PERF_RECORD_MISC_USER (2 << 0)
|
|
#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
|
|
#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
|
|
#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
|
|
|
|
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
|
|
/*
|
|
* Indicates that the content of PERF_SAMPLE_IP points to
|
|
* the actual instruction that triggered the event. See also
|
|
* perf_event_attr::precise_ip.
|
|
*/
|
|
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
|
|
/*
|
|
* Reserve the last bit to indicate some extended misc field
|
|
*/
|
|
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
|
|
|
|
struct perf_event_header {
|
|
__u32 type;
|
|
__u16 misc;
|
|
__u16 size;
|
|
};
|
|
|
|
enum perf_event_type {
|
|
|
|
/*
|
|
* If perf_event_attr.sample_id_all is set then all event types will
|
|
* have the sample_type selected fields related to where/when
|
|
* (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
|
|
* IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
|
|
* just after the perf_event_header and the fields already present for
|
|
* the existing fields, i.e. at the end of the payload. That way a newer
|
|
* perf.data file will be supported by older perf tools, with these new
|
|
* optional fields being ignored.
|
|
*
|
|
* struct sample_id {
|
|
* { u32 pid, tid; } && PERF_SAMPLE_TID
|
|
* { u64 time; } && PERF_SAMPLE_TIME
|
|
* { u64 id; } && PERF_SAMPLE_ID
|
|
* { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
|
|
* { u32 cpu, res; } && PERF_SAMPLE_CPU
|
|
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
|
|
* } && perf_event_attr::sample_id_all
|
|
*
|
|
* Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The
|
|
* advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
|
|
* relative to header.size.
|
|
*/
|
|
|
|
/*
|
|
* The MMAP events record the PROT_EXEC mappings so that we can
|
|
* correlate userspace IPs to code. They have the following structure:
|
|
*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
*
|
|
* u32 pid, tid;
|
|
* u64 addr;
|
|
* u64 len;
|
|
* u64 pgoff;
|
|
* char filename[];
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_MMAP = 1,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
* u64 id;
|
|
* u64 lost;
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_LOST = 2,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
*
|
|
* u32 pid, tid;
|
|
* char comm[];
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_COMM = 3,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
* u32 pid, ppid;
|
|
* u32 tid, ptid;
|
|
* u64 time;
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_EXIT = 4,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
* u64 time;
|
|
* u64 id;
|
|
* u64 stream_id;
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_THROTTLE = 5,
|
|
PERF_RECORD_UNTHROTTLE = 6,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
* u32 pid, ppid;
|
|
* u32 tid, ptid;
|
|
* u64 time;
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_FORK = 7,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
* u32 pid, tid;
|
|
*
|
|
* struct read_format values;
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_READ = 8,
|
|
|
|
/*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
*
|
|
* #
|
|
* # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
|
|
* # The advantage of PERF_SAMPLE_IDENTIFIER is that its position
|
|
* # is fixed relative to header.
|
|
* #
|
|
*
|
|
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
|
|
* { u64 ip; } && PERF_SAMPLE_IP
|
|
* { u32 pid, tid; } && PERF_SAMPLE_TID
|
|
* { u64 time; } && PERF_SAMPLE_TIME
|
|
* { u64 addr; } && PERF_SAMPLE_ADDR
|
|
* { u64 id; } && PERF_SAMPLE_ID
|
|
* { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
|
|
* { u32 cpu, res; } && PERF_SAMPLE_CPU
|
|
* { u64 period; } && PERF_SAMPLE_PERIOD
|
|
*
|
|
* { struct read_format values; } && PERF_SAMPLE_READ
|
|
*
|
|
* { u64 nr,
|
|
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
|
|
*
|
|
* #
|
|
* # The RAW record below is opaque data wrt the ABI
|
|
* #
|
|
* # That is, the ABI doesn't make any promises wrt to
|
|
* # the stability of its content, it may vary depending
|
|
* # on event, hardware, kernel version and phase of
|
|
* # the moon.
|
|
* #
|
|
* # In other words, PERF_SAMPLE_RAW contents are not an ABI.
|
|
* #
|
|
*
|
|
* { u32 size;
|
|
* char data[size];}&& PERF_SAMPLE_RAW
|
|
*
|
|
* { u64 nr;
|
|
* { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
|
|
*
|
|
* { u64 abi; # enum perf_sample_regs_abi
|
|
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
|
|
*
|
|
* { u64 size;
|
|
* char data[size];
|
|
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
|
|
*
|
|
* { u64 weight; } && PERF_SAMPLE_WEIGHT
|
|
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
|
|
* };
|
|
*/
|
|
PERF_RECORD_SAMPLE = 9,
|
|
|
|
/*
|
|
* The MMAP2 records are an augmented version of MMAP, they add
|
|
* maj, min, ino numbers to be used to uniquely identify each mapping
|
|
*
|
|
* struct {
|
|
* struct perf_event_header header;
|
|
*
|
|
* u32 pid, tid;
|
|
* u64 addr;
|
|
* u64 len;
|
|
* u64 pgoff;
|
|
* u32 maj;
|
|
* u32 min;
|
|
* u64 ino;
|
|
* u64 ino_generation;
|
|
* char filename[];
|
|
* struct sample_id sample_id;
|
|
* };
|
|
*/
|
|
PERF_RECORD_MMAP2 = 10,
|
|
|
|
PERF_RECORD_MAX, /* non-ABI */
|
|
};
|
|
|
|
#define PERF_MAX_STACK_DEPTH 127
|
|
|
|
enum perf_callchain_context {
|
|
PERF_CONTEXT_HV = (__u64)-32,
|
|
PERF_CONTEXT_KERNEL = (__u64)-128,
|
|
PERF_CONTEXT_USER = (__u64)-512,
|
|
|
|
PERF_CONTEXT_GUEST = (__u64)-2048,
|
|
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
|
|
PERF_CONTEXT_GUEST_USER = (__u64)-2560,
|
|
|
|
PERF_CONTEXT_MAX = (__u64)-4095,
|
|
};
|
|
|
|
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
|
|
#define PERF_FLAG_FD_OUTPUT (1U << 1)
|
|
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
|
|
|
|
union perf_mem_data_src {
|
|
__u64 val;
|
|
struct {
|
|
__u64 mem_op:5, /* type of opcode */
|
|
mem_lvl:14, /* memory hierarchy level */
|
|
mem_snoop:5, /* snoop mode */
|
|
mem_lock:2, /* lock instr */
|
|
mem_dtlb:7, /* tlb access */
|
|
mem_rsvd:31;
|
|
};
|
|
};
|
|
|
|
/* type of opcode (load/store/prefetch,code) */
|
|
#define PERF_MEM_OP_NA 0x01 /* not available */
|
|
#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
|
|
#define PERF_MEM_OP_STORE 0x04 /* store instruction */
|
|
#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
|
|
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
|
|
#define PERF_MEM_OP_SHIFT 0
|
|
|
|
/* memory hierarchy (memory level, hit or miss) */
|
|
#define PERF_MEM_LVL_NA 0x01 /* not available */
|
|
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
|
|
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
|
|
#define PERF_MEM_LVL_L1 0x08 /* L1 */
|
|
#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
|
|
#define PERF_MEM_LVL_L2 0x20 /* L2 */
|
|
#define PERF_MEM_LVL_L3 0x40 /* L3 */
|
|
#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
|
|
#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
|
|
#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
|
|
#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
|
|
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
|
|
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
|
|
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
|
|
#define PERF_MEM_LVL_SHIFT 5
|
|
|
|
/* snoop mode */
|
|
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
|
|
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
|
|
#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
|
|
#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
|
|
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
|
|
#define PERF_MEM_SNOOP_SHIFT 19
|
|
|
|
/* locked instruction */
|
|
#define PERF_MEM_LOCK_NA 0x01 /* not available */
|
|
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
|
|
#define PERF_MEM_LOCK_SHIFT 24
|
|
|
|
/* TLB access */
|
|
#define PERF_MEM_TLB_NA 0x01 /* not available */
|
|
#define PERF_MEM_TLB_HIT 0x02 /* hit level */
|
|
#define PERF_MEM_TLB_MISS 0x04 /* miss level */
|
|
#define PERF_MEM_TLB_L1 0x08 /* L1 */
|
|
#define PERF_MEM_TLB_L2 0x10 /* L2 */
|
|
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
|
|
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
|
|
#define PERF_MEM_TLB_SHIFT 26
|
|
|
|
#define PERF_MEM_S(a, s) \
|
|
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
|
|
|
/*
|
|
* single taken branch record layout:
|
|
*
|
|
* from: source instruction (may not always be a branch insn)
|
|
* to: branch target
|
|
* mispred: branch target was mispredicted
|
|
* predicted: branch target was predicted
|
|
*
|
|
* support for mispred, predicted is optional. In case it
|
|
* is not supported mispred = predicted = 0.
|
|
*
|
|
* in_tx: running in a hardware transaction
|
|
* abort: aborting a hardware transaction
|
|
*/
|
|
struct perf_branch_entry {
|
|
__u64 from;
|
|
__u64 to;
|
|
__u64 mispred:1, /* target mispredicted */
|
|
predicted:1,/* target predicted */
|
|
in_tx:1, /* in transaction */
|
|
abort:1, /* transaction abort */
|
|
reserved:60;
|
|
};
|
|
|
|
#endif /* _UAPI_LINUX_PERF_EVENT_H */
|