linux/tools/perf/util/stat.h
Jin Yao ee6a961432 perf stat: Turn off summary for interval mode by default
There's a risk that outputting interval mode summaries by default breaks
CSV consumers. It already broke pmu-tools/toplev.

So now we turn off the summary by default but we create a new option
'--summary' to enable the summary. This is active even when not using
CSV mode.

Before:

  root@kbl-ppc:~# perf stat -I1000 --interval-count 2
  #           time             counts unit events
       1.000265904           8,005.73 msec cpu-clock                 #    8.006 CPUs utilized
       1.000265904                601      context-switches          #    0.075 K/sec
       1.000265904                 10      cpu-migrations            #    0.001 K/sec
       1.000265904                  0      page-faults               #    0.000 K/sec
       1.000265904         66,746,521      cycles                    #    0.008 GHz
       1.000265904         71,874,398      instructions              #    1.08  insn per cycle
       1.000265904         13,356,781      branches                  #    1.668 M/sec
       1.000265904            298,756      branch-misses             #    2.24% of all branches
       2.001857667           8,012.52 msec cpu-clock                 #    8.013 CPUs utilized
       2.001857667                164      context-switches          #    0.020 K/sec
       2.001857667                 10      cpu-migrations            #    0.001 K/sec
       2.001857667                  2      page-faults               #    0.000 K/sec
       2.001857667          5,822,188      cycles                    #    0.001 GHz
       2.001857667          2,186,170      instructions              #    0.38  insn per cycle
       2.001857667            442,378      branches                  #    0.055 M/sec
       2.001857667             44,750      branch-misses             #   10.12% of all branches

   Performance counter stats for 'system wide':

           16,018.25 msec cpu-clock                 #    7.993 CPUs utilized
                 765      context-switches          #    0.048 K/sec
                  20      cpu-migrations            #    0.001 K/sec
                   2      page-faults               #    0.000 K/sec
          72,568,709      cycles                    #    0.005 GHz
          74,060,568      instructions              #    1.02  insn per cycle
          13,799,159      branches                  #    0.861 M/sec
             343,506      branch-misses             #    2.49% of all branches

         2.004118489 seconds time elapsed

After:

  root@kbl-ppc:~# perf stat -I1000 --interval-count 2
  #           time             counts unit events
       1.001336393           8,013.28 msec cpu-clock                 #    8.013 CPUs utilized
       1.001336393                 82      context-switches          #    0.010 K/sec
       1.001336393                  8      cpu-migrations            #    0.001 K/sec
       1.001336393                  0      page-faults               #    0.000 K/sec
       1.001336393          4,199,121      cycles                    #    0.001 GHz
       1.001336393          1,373,991      instructions              #    0.33  insn per cycle
       1.001336393            270,681      branches                  #    0.034 M/sec
       1.001336393             31,659      branch-misses             #   11.70% of all branches
       2.003905006           8,020.52 msec cpu-clock                 #    8.021 CPUs utilized
       2.003905006                184      context-switches          #    0.023 K/sec
       2.003905006                  8      cpu-migrations            #    0.001 K/sec
       2.003905006                  2      page-faults               #    0.000 K/sec
       2.003905006          5,446,190      cycles                    #    0.001 GHz
       2.003905006          2,312,547      instructions              #    0.42  insn per cycle
       2.003905006            451,691      branches                  #    0.056 M/sec
       2.003905006             37,925      branch-misses             #    8.40% of all branches

  root@kbl-ppc:~# perf stat -I1000 --interval-count 2 --summary
  #           time             counts unit events
       1.001313128           8,013.20 msec cpu-clock                 #    8.013 CPUs utilized
       1.001313128                 83      context-switches          #    0.010 K/sec
       1.001313128                  8      cpu-migrations            #    0.001 K/sec
       1.001313128                  0      page-faults               #    0.000 K/sec
       1.001313128          4,470,950      cycles                    #    0.001 GHz
       1.001313128          1,440,045      instructions              #    0.32  insn per cycle
       1.001313128            283,222      branches                  #    0.035 M/sec
       1.001313128             33,576      branch-misses             #   11.86% of all branches
       2.003857385           8,020.34 msec cpu-clock                 #    8.020 CPUs utilized
       2.003857385                154      context-switches          #    0.019 K/sec
       2.003857385                  8      cpu-migrations            #    0.001 K/sec
       2.003857385                  2      page-faults               #    0.000 K/sec
       2.003857385          4,515,676      cycles                    #    0.001 GHz
       2.003857385          2,180,449      instructions              #    0.48  insn per cycle
       2.003857385            435,254      branches                  #    0.054 M/sec
       2.003857385             31,179      branch-misses             #    7.16% of all branches

   Performance counter stats for 'system wide':

           16,033.53 msec cpu-clock                 #    7.992 CPUs utilized
                 237      context-switches          #    0.015 K/sec
                  16      cpu-migrations            #    0.001 K/sec
                   2      page-faults               #    0.000 K/sec
           8,986,626      cycles                    #    0.001 GHz
           3,620,494      instructions              #    0.40  insn per cycle
             718,476      branches                  #    0.045 M/sec
              64,755      branch-misses             #    9.01% of all branches

         2.006124542 seconds time elapsed

Fixes: c7e5b328a8 ("perf stat: Report summary for interval mode")
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20200903010113.32232-1-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-09-03 15:48:41 -03:00

240 lines
5.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_STATS_H
#define __PERF_STATS_H
#include <linux/types.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/resource.h>
#include "rblist.h"
struct perf_cpu_map;
struct perf_stat_config;
struct timespec;
struct stats {
double n, mean, M2;
u64 max, min;
};
enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__NONE = 0,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
};
struct perf_stat_evsel {
struct stats res_stats[3];
enum perf_stat_evsel_id id;
u64 *group_data;
};
enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_DIE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
AGGR_NODE,
};
enum {
CTX_BIT_USER = 1 << 0,
CTX_BIT_KERNEL = 1 << 1,
CTX_BIT_HV = 1 << 2,
CTX_BIT_HOST = 1 << 3,
CTX_BIT_IDLE = 1 << 4,
CTX_BIT_MAX = 1 << 5,
};
#define NUM_CTX CTX_BIT_MAX
enum stat_type {
STAT_NONE = 0,
STAT_NSECS,
STAT_CYCLES,
STAT_STALLED_CYCLES_FRONT,
STAT_STALLED_CYCLES_BACK,
STAT_BRANCHES,
STAT_CACHEREFS,
STAT_L1_DCACHE,
STAT_L1_ICACHE,
STAT_LL_CACHE,
STAT_ITLB_CACHE,
STAT_DTLB_CACHE,
STAT_CYCLES_IN_TX,
STAT_TRANSACTION,
STAT_ELISION,
STAT_TOPDOWN_TOTAL_SLOTS,
STAT_TOPDOWN_SLOTS_ISSUED,
STAT_TOPDOWN_SLOTS_RETIRED,
STAT_TOPDOWN_FETCH_BUBBLES,
STAT_TOPDOWN_RECOVERY_BUBBLES,
STAT_SMI_NUM,
STAT_APERF,
STAT_MAX
};
struct runtime_stat {
struct rblist value_list;
};
typedef int (*aggr_get_id_t)(struct perf_stat_config *config,
struct perf_cpu_map *m, int cpu);
struct perf_stat_config {
enum aggr_mode aggr_mode;
bool scale;
bool no_inherit;
bool identifier;
bool csv_output;
bool interval_clear;
bool metric_only;
bool null_run;
bool ru_display;
bool big_num;
bool no_merge;
bool walltime_run_table;
bool all_kernel;
bool all_user;
bool percore_show_thread;
bool summary;
bool metric_no_group;
bool metric_no_merge;
bool stop_read_counter;
FILE *output;
unsigned int interval;
unsigned int timeout;
int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
int run_count;
int print_free_counters_hint;
int print_mixed_hw_group_error;
struct runtime_stat *stats;
int stats_num;
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
struct perf_cpu_map *aggr_map;
aggr_get_id_t aggr_get_id;
struct perf_cpu_map *cpus_aggr_map;
u64 *walltime_run;
struct rblist metric_events;
int ctl_fd;
int ctl_fd_ack;
};
void perf_stat__set_big_num(int set);
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
double stddev_stats(struct stats *stats);
double rel_stddev_stats(double stddev, double avg);
static inline void init_stats(struct stats *stats)
{
stats->n = 0.0;
stats->mean = 0.0;
stats->M2 = 0.0;
stats->min = (u64) -1;
stats->max = 0;
}
struct evsel;
struct evlist;
struct perf_aggr_thread_value {
struct evsel *counter;
int id;
double uval;
u64 val;
u64 run;
u64 ena;
};
bool __perf_evsel_stat__is(struct evsel *evsel,
enum perf_stat_evsel_id id);
#define perf_stat_evsel__is(evsel, id) \
__perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
extern struct runtime_stat rt_stat;
extern struct stats walltime_nsecs_stats;
typedef void (*print_metric_t)(struct perf_stat_config *config,
void *ctx, const char *color, const char *unit,
const char *fmt, double val);
typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
void runtime_stat__init(struct runtime_stat *st);
void runtime_stat__exit(struct runtime_stat *st);
void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
int cpu, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
new_line_t new_line;
bool force_header;
};
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st);
void perf_stat__collect_metric_expr(struct evlist *);
int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct evlist *evlist);
void perf_evlist__reset_stats(struct evlist *evlist);
void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
struct perf_tool;
union perf_event;
struct perf_session;
struct target;
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event);
size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
int cpu);
void
perf_evlist__print_counters(struct evlist *evlist,
struct perf_stat_config *config,
struct target *_target,
struct timespec *ts,
int argc, const char **argv);
struct metric_expr;
double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
#endif