mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-15 06:55:13 +08:00
perf sort: Fix the 'weight' sort key behavior
Currently, the 'weight' field in the perf sample has latency information for some instructions like in memory accesses. And perf tool has 'weight' and 'local_weight' sort keys to display the info. But it's somewhat confusing what it shows exactly. In my understanding, 'local_weight' shows a weight in a single sample, and (global) 'weight' shows a sum of the weights in the hist_entry. For example: $ perf mem record -t load dd if=/dev/zero of=/dev/null bs=4k count=1M $ perf report --stdio -n -s +local_weight ... # # Overhead Samples Command Shared Object Symbol Local Weight # ........ ....... ....... ................ ......................... ............ # 21.23% 313 dd [kernel.vmlinux] [k] lockref_get_not_zero 32 12.43% 183 dd [kernel.vmlinux] [k] lockref_get_not_zero 35 11.97% 159 dd [kernel.vmlinux] [k] lockref_get_not_zero 36 10.40% 141 dd [kernel.vmlinux] [k] lockref_put_return 32 7.63% 113 dd [kernel.vmlinux] [k] lockref_get_not_zero 33 6.37% 92 dd [kernel.vmlinux] [k] lockref_get_not_zero 34 6.15% 90 dd [kernel.vmlinux] [k] lockref_put_return 33 ... So let's look at the 'lockref_get_not_zero' symbols. The top entry shows that 313 samples were captured with 'local_weight' 32, so the total weight should be 313 x 32 = 10016. But it's not the case: $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ...... # 1.36% 4 dd [kernel.vmlinux] 36 144 0.47% 4 dd [kernel.vmlinux] 37 148 0.42% 4 dd [kernel.vmlinux] 32 128 0.40% 4 dd [kernel.vmlinux] 34 136 0.35% 4 dd [kernel.vmlinux] 36 144 0.34% 4 dd [kernel.vmlinux] 35 140 0.30% 4 dd [kernel.vmlinux] 36 144 0.30% 4 dd [kernel.vmlinux] 34 136 0.30% 4 dd [kernel.vmlinux] 32 128 0.30% 4 dd [kernel.vmlinux] 32 128 ... With the 'weight' sort key, it's divided to 4 samples even with the same info ('comm', 'dso', 'sym' and 'local_weight'). I don't think this is what we want. I found this because of the way it aggregates the 'weight' value. Since it's not a period, we should not add them in the he->stat. Otherwise, two 32 'weight' entries will create a 64 'weight' entry. After that, new 32 'weight' samples don't have a matching entry so it'd create a new entry and make it a 64 'weight' entry again and again. Later, they will be merged into 128 'weight' entries during the hists__collapse_resort() with 4 samples, multiple times like above. Let's keep the weight and display it differently. For 'local_weight', it can show the weight as is, and for (global) 'weight' it can display the number multiplied by the number of samples. With this change, I can see the expected numbers. $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ..... # 21.23% 313 dd [kernel.vmlinux] 32 10016 12.43% 183 dd [kernel.vmlinux] 35 6405 11.97% 159 dd [kernel.vmlinux] 36 5724 7.63% 113 dd [kernel.vmlinux] 33 3729 6.37% 92 dd [kernel.vmlinux] 34 3128 4.17% 59 dd [kernel.vmlinux] 37 2183 0.08% 1 dd [kernel.vmlinux] 269 269 0.08% 1 dd [kernel.vmlinux] 38 38 Reviewed-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20211105225617.151364-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
70f9c9b2df
commit
784e8adda4
@ -290,11 +290,9 @@ static long hist_time(unsigned long htime)
|
||||
}
|
||||
|
||||
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
|
||||
u64 weight, u64 ins_lat, u64 p_stage_cyc)
|
||||
u64 ins_lat, u64 p_stage_cyc)
|
||||
{
|
||||
|
||||
he_stat->period += period;
|
||||
he_stat->weight += weight;
|
||||
he_stat->nr_events += 1;
|
||||
he_stat->ins_lat += ins_lat;
|
||||
he_stat->p_stage_cyc += p_stage_cyc;
|
||||
@ -308,9 +306,8 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
|
||||
dest->period_guest_sys += src->period_guest_sys;
|
||||
dest->period_guest_us += src->period_guest_us;
|
||||
dest->nr_events += src->nr_events;
|
||||
dest->weight += src->weight;
|
||||
dest->ins_lat += src->ins_lat;
|
||||
dest->p_stage_cyc += src->p_stage_cyc;
|
||||
dest->p_stage_cyc += src->p_stage_cyc;
|
||||
}
|
||||
|
||||
static void he_stat__decay(struct he_stat *he_stat)
|
||||
@ -598,7 +595,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
|
||||
struct hist_entry *he;
|
||||
int64_t cmp;
|
||||
u64 period = entry->stat.period;
|
||||
u64 weight = entry->stat.weight;
|
||||
u64 ins_lat = entry->stat.ins_lat;
|
||||
u64 p_stage_cyc = entry->stat.p_stage_cyc;
|
||||
bool leftmost = true;
|
||||
@ -619,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
|
||||
|
||||
if (!cmp) {
|
||||
if (sample_self) {
|
||||
he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
|
||||
he_stat__add_period(&he->stat, period, ins_lat, p_stage_cyc);
|
||||
hist_entry__add_callchain_period(he, period);
|
||||
}
|
||||
if (symbol_conf.cumulate_callchain)
|
||||
he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
|
||||
he_stat__add_period(he->stat_acc, period, ins_lat, p_stage_cyc);
|
||||
|
||||
/*
|
||||
* This mem info was allocated from sample__resolve_mem
|
||||
@ -733,7 +729,6 @@ __hists__add_entry(struct hists *hists,
|
||||
.stat = {
|
||||
.nr_events = 1,
|
||||
.period = sample->period,
|
||||
.weight = sample->weight,
|
||||
.ins_lat = sample->ins_lat,
|
||||
.p_stage_cyc = sample->p_stage_cyc,
|
||||
},
|
||||
@ -748,6 +743,7 @@ __hists__add_entry(struct hists *hists,
|
||||
.raw_size = sample->raw_size,
|
||||
.ops = ops,
|
||||
.time = hist_time(sample->time),
|
||||
.weight = sample->weight,
|
||||
}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
|
||||
|
||||
if (!hists->has_callchains && he && he->callchain_size != 0)
|
||||
|
@ -1325,45 +1325,35 @@ struct sort_entry sort_mispredict = {
|
||||
.se_width_idx = HISTC_MISPREDICT,
|
||||
};
|
||||
|
||||
static u64 he_weight(struct hist_entry *he)
|
||||
{
|
||||
return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
|
||||
}
|
||||
|
||||
static int64_t
|
||||
sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return he_weight(left) - he_weight(right);
|
||||
return left->weight - right->weight;
|
||||
}
|
||||
|
||||
static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
|
||||
}
|
||||
|
||||
struct sort_entry sort_local_weight = {
|
||||
.se_header = "Local Weight",
|
||||
.se_cmp = sort__local_weight_cmp,
|
||||
.se_cmp = sort__weight_cmp,
|
||||
.se_snprintf = hist_entry__local_weight_snprintf,
|
||||
.se_width_idx = HISTC_LOCAL_WEIGHT,
|
||||
};
|
||||
|
||||
static int64_t
|
||||
sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
||||
{
|
||||
return left->stat.weight - right->stat.weight;
|
||||
}
|
||||
|
||||
static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
|
||||
size_t size, unsigned int width)
|
||||
{
|
||||
return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
|
||||
return repsep_snprintf(bf, size, "%-*llu", width,
|
||||
he->weight * he->stat.nr_events);
|
||||
}
|
||||
|
||||
struct sort_entry sort_global_weight = {
|
||||
.se_header = "Weight",
|
||||
.se_cmp = sort__global_weight_cmp,
|
||||
.se_cmp = sort__weight_cmp,
|
||||
.se_snprintf = hist_entry__global_weight_snprintf,
|
||||
.se_width_idx = HISTC_GLOBAL_WEIGHT,
|
||||
};
|
||||
|
@ -49,7 +49,6 @@ struct he_stat {
|
||||
u64 period_us;
|
||||
u64 period_guest_sys;
|
||||
u64 period_guest_us;
|
||||
u64 weight;
|
||||
u64 ins_lat;
|
||||
u64 p_stage_cyc;
|
||||
u32 nr_events;
|
||||
@ -109,6 +108,7 @@ struct hist_entry {
|
||||
s32 socket;
|
||||
s32 cpu;
|
||||
u64 code_page_size;
|
||||
u64 weight;
|
||||
u8 cpumode;
|
||||
u8 depth;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user