From 119a784c81270eb88e573174ed2209225d646656 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: [PATCH] perf/core: Add a new read format to get a number of lost samples Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 5 ++++- kernel/events/core.c | 21 ++++++++++++++++++--- kernel/events/ring_buffer.c | 5 ++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index da759560eec5..ee8b9ecdc03b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -759,6 +759,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d37629dbad72..0474ee362151 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -301,6 +301,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -308,6 +309,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -317,8 +319,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 80782cddb1da..4d8c335a07db 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1819,6 +1819,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings) if (event->attr.read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); @@ -5260,11 +5263,15 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); } raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -5321,7 +5328,7 @@ static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -5331,6 +5338,8 @@ static int perf_read_one(struct perf_event *event, values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -6858,7 +6867,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, u64 enabled, u64 running) { u64 read_format = event->attr.read_format; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = perf_event_count(event); @@ -6872,6 +6881,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } @@ -6882,7 +6893,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; - u64 values[5]; + u64 values[6]; int n = 0; values[n++] = 1 + leader->nr_siblings; @@ -6900,6 +6911,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); __output_copy(handle, values, n * sizeof(u64)); @@ -6913,6 +6926,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index fb35b926024c..726132039c38 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle, goto out; if (unlikely(rb->paused)) { - if (rb->nr_pages) + if (rb->nr_pages) { local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); + } goto out; } @@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle, fail: local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); perf_output_put_handle(handle); out: rcu_read_unlock();