From ea57c4f5203d82c7844c54cdef54e972cf4e9d1f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Sep 2009 18:15:54 +0200 Subject: [PATCH] perf tools: Implement counter output multiplexing Finish the -M/--multiplex option implementation: - separate it out from group_fd - correctly set it via the ioctl and dont mmap counters that are multiplexed - modify the perf record event loop to deal with buffer-less counters. - remove the -g option from perf sched record - account for unordered events in perf sched latency - (add -f to perf sched record to ease measurements) - skip idle threads (pid==0) in latency output The result is better latency output by 'perf sched latency': ----------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | ----------------------------------------------------------------------------------- ksoftirqd/8 | 0.071 ms | 2 | avg: 0.458 ms | max: 0.913 ms | at-spi-registry | 0.609 ms | 19 | avg: 0.013 ms | max: 0.023 ms | perf | 3.316 ms | 16 | avg: 0.013 ms | max: 0.054 ms | Xorg | 0.392 ms | 19 | avg: 0.011 ms | max: 0.018 ms | sleep | 0.537 ms | 2 | avg: 0.009 ms | max: 0.009 ms | ----------------------------------------------------------------------------------- TOTAL: | 4.925 ms | 58 | --------------------------------------------- Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 41 ++++++++++++++++++----------- tools/perf/builtin-sched.c | 25 +++++++++++++++--- tools/perf/util/trace-event-parse.c | 6 +++-- 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 79f99dba5be0..5f3127e7a615 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,6 +49,7 @@ static int inherit_stat = 0; static int no_samples = 0; static int sample_address = 0; static int multiplex = 0; +static int multiplex_fd = -1; static long samples; static struct timeval last_read; @@ -471,24 +472,30 @@ try_again: */ if (group && group_fd == -1) group_fd = fd[nr_cpu][counter]; + if (multiplex && multiplex_fd == -1) + multiplex_fd = fd[nr_cpu][counter]; - event_array[nr_poll].fd = fd[nr_cpu][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; + if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { + int ret; - mmap_array[nr_cpu][counter].counter = counter; - mmap_array[nr_cpu][counter].prev = 0; - mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; - mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); - if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); + ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd); + assert(ret != -1); + } else { + event_array[nr_poll].fd = fd[nr_cpu][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[nr_cpu][counter].counter = counter; + mmap_array[nr_cpu][counter].prev = 0; + mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; + mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); + if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); + exit(-1); + } } - if (multiplex && fd[nr_cpu][counter] != group_fd) - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, group_fd); - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); } @@ -618,8 +625,10 @@ static int __cmd_record(int argc, const char **argv) int hits = samples; for (i = 0; i < nr_cpu; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); + for (counter = 0; counter < nr_counters; counter++) { + if (mmap_array[i][counter].base) + mmap_read(&mmap_array[i][counter]); + } } if (hits == samples) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 3e003237c42f..2ce87ef5a3e6 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -116,6 +116,8 @@ static u64 sum_fluct; static u64 run_avg; static unsigned long replay_repeat = 10; +static unsigned long nr_timestamps; +static unsigned long unordered_timestamps; #define TASK_STATE_TO_CHAR_STR "RSDTtZX" @@ -1109,8 +1111,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (atom->state != THREAD_SLEEPING) return; - if (atom->sched_out_time > timestamp) + nr_timestamps++; + if (atom->sched_out_time > timestamp) { + unordered_timestamps++; return; + } atom->state = THREAD_WAIT_CPU; atom->wake_up_time = timestamp; @@ -1130,6 +1135,11 @@ static void output_lat_thread(struct task_atoms *atom_list) if (!atom_list->nb_atoms) return; + /* + * Ignore idle threads: + */ + if (!atom_list->thread->pid) + return; all_runtime += atom_list->total_runtime; all_count += atom_list->nb_atoms; @@ -1301,8 +1311,16 @@ static void __cmd_lat(void) } printf("-----------------------------------------------------------------------------------\n"); - printf(" TOTAL: |%9.3f ms |%9Ld |\n", + printf(" TOTAL: |%9.3f ms |%9Ld |", (double)all_runtime/1e6, all_count); + + if (unordered_timestamps && nr_timestamps) { + printf(" INFO: %.2f%% unordered events.\n", + (double)unordered_timestamps/(double)nr_timestamps*100.0); + } else { + printf("\n"); + } + printf("---------------------------------------------\n"); } @@ -1667,12 +1685,13 @@ static const char *record_args[] = { "-a", "-R", "-M", - "-g", + "-f", "-c", "1", "-e", "sched:sched_switch:r", "-e", "sched:sched_stat_wait:r", "-e", "sched:sched_stat_sleep:r", "-e", "sched:sched_stat_iowait:r", + "-e", "sched:sched_stat_runtime:r", "-e", "sched:sched_process_exit:r", "-e", "sched:sched_process_fork:r", "-e", "sched:sched_wakeup:r", diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 64d6e302751a..f6a8437141c8 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2722,8 +2722,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, type = trace_parse_common_type(data); event = trace_find_event(type); - if (!event) - die("ug! no event found for type %d", type); + if (!event) { + printf("ug! no event found for type %d\n", type); + return; + } pid = parse_common_pid(data);