mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-06 02:24:14 +08:00
perf/x86/intel/lbr: Optimize context switches for the LBR call stack
Context switches with perf LBR call stack context are fairly expensive because they do a lot of MSR writes. Currently we unconditionally do the expensive operation when LBR call stack is enabled. It's not necessary for some common cases, e.g task -> other kernel thread -> same task. The LBR registers are not changed, hence they don't need to be rewritten/restored. Introduce per-CPU variables to track the last LBR call stack context. If the same context is scheduled in, the rewrite/restore is not required, with the following two exceptions: - The LBR registers may be modified by a normal LBR event, i.e., adding a new LBR event or scheduling an existing LBR event. In both cases, the LBR registers are reset first. The last LBR call stack information is cleared in intel_pmu_lbr_reset(). Restoring the LBR registers is required. - The LBR registers are initialized to zero in C6. If the LBR registers which TOS points is cleared, C6 must be entered while swapped out. Restoring the LBR registers is required as well. These exceptions are not common. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Stephane Eranian <eranian@google.com> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: acme@kernel.org Cc: eranian@google.com Link: https://lore.kernel.org/lkml/1528213126-4312-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
0592e57b24
commit
8b077e4a69
@ -216,6 +216,8 @@ static void intel_pmu_lbr_reset_64(void)
|
||||
|
||||
void intel_pmu_lbr_reset(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
@ -223,6 +225,9 @@ void intel_pmu_lbr_reset(void)
|
||||
intel_pmu_lbr_reset_32();
|
||||
else
|
||||
intel_pmu_lbr_reset_64();
|
||||
|
||||
cpuc->last_task_ctx = NULL;
|
||||
cpuc->last_log_id = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -334,6 +339,7 @@ static inline u64 rdlbr_to(unsigned int idx)
|
||||
|
||||
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int i;
|
||||
unsigned lbr_idx, mask;
|
||||
u64 tos;
|
||||
@ -344,8 +350,20 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
|
||||
return;
|
||||
}
|
||||
|
||||
mask = x86_pmu.lbr_nr - 1;
|
||||
tos = task_ctx->tos;
|
||||
/*
|
||||
* Does not restore the LBR registers, if
|
||||
* - No one else touched them, and
|
||||
* - Did not enter C6
|
||||
*/
|
||||
if ((task_ctx == cpuc->last_task_ctx) &&
|
||||
(task_ctx->log_id == cpuc->last_log_id) &&
|
||||
rdlbr_from(tos)) {
|
||||
task_ctx->lbr_stack_state = LBR_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
mask = x86_pmu.lbr_nr - 1;
|
||||
for (i = 0; i < task_ctx->valid_lbrs; i++) {
|
||||
lbr_idx = (tos - i) & mask;
|
||||
wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
|
||||
@ -369,6 +387,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
|
||||
|
||||
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
unsigned lbr_idx, mask;
|
||||
u64 tos, from;
|
||||
int i;
|
||||
@ -393,6 +412,9 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
|
||||
task_ctx->valid_lbrs = i;
|
||||
task_ctx->tos = tos;
|
||||
task_ctx->lbr_stack_state = LBR_VALID;
|
||||
|
||||
cpuc->last_task_ctx = task_ctx;
|
||||
cpuc->last_log_id = ++task_ctx->log_id;
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
|
@ -163,6 +163,7 @@ struct intel_excl_cntrs {
|
||||
unsigned core_id; /* per-core: core id */
|
||||
};
|
||||
|
||||
struct x86_perf_task_context;
|
||||
#define MAX_LBR_ENTRIES 32
|
||||
|
||||
enum {
|
||||
@ -214,6 +215,8 @@ struct cpu_hw_events {
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
struct er_account *lbr_sel;
|
||||
u64 br_sel;
|
||||
struct x86_perf_task_context *last_task_ctx;
|
||||
int last_log_id;
|
||||
|
||||
/*
|
||||
* Intel host/guest exclude bits
|
||||
@ -651,6 +654,7 @@ struct x86_perf_task_context {
|
||||
int valid_lbrs;
|
||||
int lbr_callstack_users;
|
||||
int lbr_stack_state;
|
||||
int log_id;
|
||||
};
|
||||
|
||||
#define x86_add_quirk(func_) \
|
||||
|
Loading…
Reference in New Issue
Block a user