linux/tools/perf/util/lock-contention.h

173 lines
3.9 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#ifndef PERF_LOCK_CONTENTION_H
#define PERF_LOCK_CONTENTION_H
#include <linux/list.h>
#include <linux/rbtree.h>
perf lock contention: Add -Y/--type-filter option The -Y/--type-filter option is to filter the result for specific lock types only. It can accept comma-separated values. Note that it would accept type names like one in the output. spinlock, mutex, rwsem:R and so on. For RW-variant lock types, it converts the name to the both variants. In other words, "rwsem" is same as "rwsem:R,rwsem:W". Also note that "mutex" has two different encoding - one for sleeping wait, another for optimistic spinning. Add "mutex-spin" entry for the lock_type_table so that we can add it for "mutex" under the table. $ sudo ./perf lock record -a -- ./perf bench sched messaging $ sudo ./perf lock con -E 5 -Y spinlock contended total wait max wait avg wait type caller 802 1.26 ms 11.73 us 1.58 us spinlock __wake_up_common_lock+0x62 13 787.16 us 105.44 us 60.55 us spinlock remove_wait_queue+0x14 12 612.96 us 78.70 us 51.08 us spinlock prepare_to_wait+0x27 114 340.68 us 12.61 us 2.99 us spinlock try_to_wake_up+0x1f5 83 226.38 us 9.15 us 2.73 us spinlock folio_lruvec_lock_irqsave+0x5e Committer notes: Make get_type_flag() return UINT_MAX for error instad of -1UL, as that function returns 'unsigned int' and we store the value on a 'unsigned int' 'flags' variable which makes clang unhappy: 35 98.23 fedora:37 : FAIL clang version 15.0.6 (Fedora 15.0.6-1.fc37) builtin-lock.c:2012:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2021:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2037:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ 3 errors generated. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221219201732.460111-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-20 04:17:28 +08:00
struct lock_filter {
int nr_types;
perf lock contention: Add -L/--lock-filter option The -L/--lock-filter option is to filter only given locks. The locks can be specified by address or name (if exists). $ sudo ./perf lock record -a sleep 1 $ sudo ./perf lock con -l contended total wait max wait avg wait address symbol 57 1.11 ms 42.83 us 19.54 us ffff9f4140059000 15 280.88 us 23.51 us 18.73 us ffffffff9d007a40 jiffies_lock 1 20.49 us 20.49 us 20.49 us ffffffff9d0d50c0 rcu_state 1 9.02 us 9.02 us 9.02 us ffff9f41759e9ba0 $ sudo ./perf lock con -L jiffies_lock,rcu_state contended total wait max wait avg wait type caller 15 280.88 us 23.51 us 18.73 us spinlock tick_sched_do_timer+0x93 1 20.49 us 20.49 us 20.49 us spinlock __softirqentry_text_start+0xeb $ sudo ./perf lock con -L ffff9f4140059000 contended total wait max wait avg wait type caller 38 779.40 us 42.83 us 20.51 us spinlock worker_thread+0x50 11 216.30 us 39.87 us 19.66 us spinlock queue_work_on+0x39 8 118.13 us 20.51 us 14.77 us spinlock kthread+0xe5 Committer testing: # uname -a Linux quaco 6.0.12-200.fc36.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Dec 8 17:15:53 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux # perf lock record ^C[ perf record: Woken up 1 times to write data ] # perf lock con -L jiffies_lock,rcu_state contended total wait max wait avg wait type caller # perf lock con contended total wait max wait avg wait type caller 1 9.06 us 9.06 us 9.06 us spinlock call_timer_fn+0x24 # perf lock con -L call ignore unknown symbol: call contended total wait max wait avg wait type caller 1 9.06 us 9.06 us 9.06 us spinlock call_timer_fn+0x24 # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221219201732.460111-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-20 04:17:30 +08:00
int nr_addrs;
int nr_syms;
perf lock contention: Add -Y/--type-filter option The -Y/--type-filter option is to filter the result for specific lock types only. It can accept comma-separated values. Note that it would accept type names like one in the output. spinlock, mutex, rwsem:R and so on. For RW-variant lock types, it converts the name to the both variants. In other words, "rwsem" is same as "rwsem:R,rwsem:W". Also note that "mutex" has two different encoding - one for sleeping wait, another for optimistic spinning. Add "mutex-spin" entry for the lock_type_table so that we can add it for "mutex" under the table. $ sudo ./perf lock record -a -- ./perf bench sched messaging $ sudo ./perf lock con -E 5 -Y spinlock contended total wait max wait avg wait type caller 802 1.26 ms 11.73 us 1.58 us spinlock __wake_up_common_lock+0x62 13 787.16 us 105.44 us 60.55 us spinlock remove_wait_queue+0x14 12 612.96 us 78.70 us 51.08 us spinlock prepare_to_wait+0x27 114 340.68 us 12.61 us 2.99 us spinlock try_to_wake_up+0x1f5 83 226.38 us 9.15 us 2.73 us spinlock folio_lruvec_lock_irqsave+0x5e Committer notes: Make get_type_flag() return UINT_MAX for error instad of -1UL, as that function returns 'unsigned int' and we store the value on a 'unsigned int' 'flags' variable which makes clang unhappy: 35 98.23 fedora:37 : FAIL clang version 15.0.6 (Fedora 15.0.6-1.fc37) builtin-lock.c:2012:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2021:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2037:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ 3 errors generated. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221219201732.460111-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-20 04:17:28 +08:00
unsigned int *types;
perf lock contention: Add -L/--lock-filter option The -L/--lock-filter option is to filter only given locks. The locks can be specified by address or name (if exists). $ sudo ./perf lock record -a sleep 1 $ sudo ./perf lock con -l contended total wait max wait avg wait address symbol 57 1.11 ms 42.83 us 19.54 us ffff9f4140059000 15 280.88 us 23.51 us 18.73 us ffffffff9d007a40 jiffies_lock 1 20.49 us 20.49 us 20.49 us ffffffff9d0d50c0 rcu_state 1 9.02 us 9.02 us 9.02 us ffff9f41759e9ba0 $ sudo ./perf lock con -L jiffies_lock,rcu_state contended total wait max wait avg wait type caller 15 280.88 us 23.51 us 18.73 us spinlock tick_sched_do_timer+0x93 1 20.49 us 20.49 us 20.49 us spinlock __softirqentry_text_start+0xeb $ sudo ./perf lock con -L ffff9f4140059000 contended total wait max wait avg wait type caller 38 779.40 us 42.83 us 20.51 us spinlock worker_thread+0x50 11 216.30 us 39.87 us 19.66 us spinlock queue_work_on+0x39 8 118.13 us 20.51 us 14.77 us spinlock kthread+0xe5 Committer testing: # uname -a Linux quaco 6.0.12-200.fc36.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Dec 8 17:15:53 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux # perf lock record ^C[ perf record: Woken up 1 times to write data ] # perf lock con -L jiffies_lock,rcu_state contended total wait max wait avg wait type caller # perf lock con contended total wait max wait avg wait type caller 1 9.06 us 9.06 us 9.06 us spinlock call_timer_fn+0x24 # perf lock con -L call ignore unknown symbol: call contended total wait max wait avg wait type caller 1 9.06 us 9.06 us 9.06 us spinlock call_timer_fn+0x24 # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221219201732.460111-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-20 04:17:30 +08:00
unsigned long *addrs;
char **syms;
perf lock contention: Add -Y/--type-filter option The -Y/--type-filter option is to filter the result for specific lock types only. It can accept comma-separated values. Note that it would accept type names like one in the output. spinlock, mutex, rwsem:R and so on. For RW-variant lock types, it converts the name to the both variants. In other words, "rwsem" is same as "rwsem:R,rwsem:W". Also note that "mutex" has two different encoding - one for sleeping wait, another for optimistic spinning. Add "mutex-spin" entry for the lock_type_table so that we can add it for "mutex" under the table. $ sudo ./perf lock record -a -- ./perf bench sched messaging $ sudo ./perf lock con -E 5 -Y spinlock contended total wait max wait avg wait type caller 802 1.26 ms 11.73 us 1.58 us spinlock __wake_up_common_lock+0x62 13 787.16 us 105.44 us 60.55 us spinlock remove_wait_queue+0x14 12 612.96 us 78.70 us 51.08 us spinlock prepare_to_wait+0x27 114 340.68 us 12.61 us 2.99 us spinlock try_to_wake_up+0x1f5 83 226.38 us 9.15 us 2.73 us spinlock folio_lruvec_lock_irqsave+0x5e Committer notes: Make get_type_flag() return UINT_MAX for error instad of -1UL, as that function returns 'unsigned int' and we store the value on a 'unsigned int' 'flags' variable which makes clang unhappy: 35 98.23 fedora:37 : FAIL clang version 15.0.6 (Fedora 15.0.6-1.fc37) builtin-lock.c:2012:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2021:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ builtin-lock.c:2037:14: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned int' is always true [-Werror,-Wtautological-constant-out-of-range-compare] if (flags != -1UL) { ~~~~~ ^ ~~~~ 3 errors generated. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221219201732.460111-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-20 04:17:28 +08:00
};
struct lock_stat {
struct hlist_node hash_entry;
struct rb_node rb; /* used for sorting */
u64 addr; /* address of lockdep_map, used as ID */
char *name; /* for strcpy(), we cannot use const */
perf lock contention: Show full callstack with -v option Currently it shows a caller function for each entry, but users need to see the full call stacks sometimes. Use -v/--verbose option to do that. # perf lock con -a -b -v sleep 3 Looking at the vmlinux_path (8 entries long) symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols contended total wait max wait avg wait type caller 1 10.74 us 10.74 us 10.74 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb841015 tick_do_update_jiffies64+0x25 0xffffffffbb8409ee tick_irq_enter+0x9e 1 7.70 us 7.70 us 7.70 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb7bc27e raw_spin_rq_lock_nested+0xe 0xffffffffbb7cef9c load_balance+0x66c Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220912055314.744552-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-09-12 13:53:12 +08:00
u64 *callstack;
unsigned int nr_acquire;
unsigned int nr_acquired;
unsigned int nr_contended;
unsigned int nr_release;
union {
unsigned int nr_readlock;
unsigned int flags;
};
unsigned int nr_trylock;
/* these times are in nano sec. */
u64 avg_wait_time;
u64 wait_time_total;
u64 wait_time_min;
u64 wait_time_max;
int broken; /* flag of blacklist */
int combined;
};
/*
* States of lock_seq_stat
*
* UNINITIALIZED is required for detecting first event of acquire.
* As the nature of lock events, there is no guarantee
* that the first event for the locks are acquire,
* it can be acquired, contended or release.
*/
#define SEQ_STATE_UNINITIALIZED 0 /* initial state */
#define SEQ_STATE_RELEASED 1
#define SEQ_STATE_ACQUIRING 2
#define SEQ_STATE_ACQUIRED 3
#define SEQ_STATE_READ_ACQUIRED 4
#define SEQ_STATE_CONTENDED 5
/*
* MAX_LOCK_DEPTH
* Imported from include/linux/sched.h.
* Should this be synchronized?
*/
#define MAX_LOCK_DEPTH 48
struct lock_stat *lock_stat_find(u64 addr);
struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-03 10:13:24 +08:00
bool match_callstack_filter(struct machine *machine, u64 *callstack);
/*
* struct lock_seq_stat:
* Place to put on state of one lock sequence
* 1) acquire -> acquired -> release
* 2) acquire -> contended -> acquired -> release
* 3) acquire (with read or try) -> release
* 4) Are there other patterns?
*/
struct lock_seq_stat {
struct list_head list;
int state;
u64 prev_event_time;
u64 addr;
int read_count;
};
struct thread_stat {
struct rb_node rb;
u32 tid;
struct list_head seq_list;
};
/*
* CONTENTION_STACK_DEPTH
* Number of stack trace entries to find callers
*/
#define CONTENTION_STACK_DEPTH 8
/*
* CONTENTION_STACK_SKIP
* Number of stack trace entries to skip when finding callers.
* The first few entries belong to the locking implementation itself.
*/
#define CONTENTION_STACK_SKIP 4
/*
* flags for lock:contention_begin
* Imported from include/trace/events/lock.h.
*/
#define LCB_F_SPIN (1U << 0)
#define LCB_F_READ (1U << 1)
#define LCB_F_WRITE (1U << 2)
#define LCB_F_RT (1U << 3)
#define LCB_F_PERCPU (1U << 4)
#define LCB_F_MUTEX (1U << 5)
struct evlist;
struct machine;
struct target;
struct lock_contention_fails {
int task;
int stack;
int time;
};
struct lock_contention {
struct evlist *evlist;
struct target *target;
struct machine *machine;
struct hlist_head *result;
struct lock_filter *filters;
struct lock_contention_fails fails;
unsigned long map_nr_entries;
perf lock contention: Allow to change stack depth and skip It needs stack traces to find callers of locks. To minimize the performance overhead it only collects up to 8 entries for each stack trace. And it skips first 3 entries as they came from BPF, tracepoint and lock functions which are not interested for most users. But it turned out that those numbers are different in some configuration. Using fixed number can result in non meaningful caller names. Let's make them adjustable with --stack-depth and --skip-stack options. On my setup, the default output is like below: # /perf lock con -ab -F contended,wait_total sleep 3 contended total wait type caller 28 4.55 ms rwlock:W __bpf_trace_contention_begin+0xb 33 1.67 ms rwlock:W __bpf_trace_contention_begin+0xb 12 580.28 us spinlock __bpf_trace_contention_begin+0xb 60 240.54 us rwsem:R __bpf_trace_contention_begin+0xb 27 64.45 us spinlock __bpf_trace_contention_begin+0xb If I change the stack skip to 5, the result will be like: # perf lock con -ab -F contended,wait_total --stack-skip 5 sleep 3 contended total wait type caller 32 715.45 us spinlock folio_lruvec_lock_irqsave+0x61 26 550.22 us spinlock folio_lruvec_lock_irqsave+0x61 15 486.93 us rwsem:R mmap_read_lock+0x13 12 139.66 us rwsem:W vm_mmap_pgoff+0x93 1 7.04 us spinlock tick_do_update_jiffies64+0x25 Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220912055314.744552-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-09-12 13:53:13 +08:00
int max_stack;
int stack_skip;
int aggr_mode;
perf lock contention: Add -o/--lock-owner option When there're many lock contentions in the system, people sometimes want to know who caused the contention, IOW who's the owner of the locks. The -o/--lock-owner option tries to follow the lock owners for the contended mutexes and rwsems from BPF, and then attributes the contention time to the owner instead of the waiter. It's a best effort approach to get the owner info at the time of the contention and doesn't guarantee to have the precise tracking of owners if it's changing over time. Currently it only handles mutex and rwsem that have owner field in their struct and it basically points to a task_struct that owns the lock at the moment. Technically its type is atomic_long_t and it comes with some LSB bits used for other meanings. So it needs to clear them when casting it to a pointer to task_struct. Also the atomic_long_t is a typedef of the atomic 32 or 64 bit types depending on arch which is a wrapper struct for the counter value. I'm not aware of proper ways to access those kernel atomic types from BPF so I just read the internal counter value directly. Please let me know if there's a better way. When -o/--lock-owner option is used, it goes to the task aggregation mode like -t/--threads option does. However it cannot get the owner for other lock types like spinlock and sometimes even for mutex. $ sudo ./perf lock con -abo -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.766 [sec] 4.766540 usecs/op 209795 ops/sec contended total wait max wait avg wait pid owner 403 565.32 us 26.81 us 1.40 us -1 Unknown 4 27.99 us 8.57 us 7.00 us 1583145 sched-pipe 1 8.25 us 8.25 us 8.25 us 1583144 sched-pipe 1 2.03 us 2.03 us 2.03 us 5068 chrome As you can see, the owner is unknown for the most cases. But if we filter only for the mutex locks, it'd more likely get the onwers. $ sudo ./perf lock con -abo -Y mutex -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.910 [sec] 4.910435 usecs/op 203647 ops/sec contended total wait max wait avg wait pid owner 2 15.50 us 8.29 us 7.75 us 1582852 sched-pipe 7 7.20 us 2.47 us 1.03 us -1 Unknown 1 6.74 us 6.74 us 6.74 us 1582851 sched-pipe Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Waiman Long <longman@redhat.com> Cc: Will Deacon <will@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230207002403.63590-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-07 08:24:02 +08:00
int owner;
bool save_callstack;
};
#ifdef HAVE_BPF_SKEL
int lock_contention_prepare(struct lock_contention *con);
int lock_contention_start(void);
int lock_contention_stop(void);
int lock_contention_read(struct lock_contention *con);
int lock_contention_finish(void);
#else /* !HAVE_BPF_SKEL */
static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
{
return 0;
}
static inline int lock_contention_start(void) { return 0; }
static inline int lock_contention_stop(void) { return 0; }
static inline int lock_contention_finish(void) { return 0; }
static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
{
return 0;
}
#endif /* HAVE_BPF_SKEL */
#endif /* PERF_LOCK_CONTENTION_H */