mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 07:34:12 +08:00
perf: Cure task_oncpu_function_call() races
Oleg reported that on architectures with __ARCH_WANT_INTERRUPTS_ON_CTXSW the IPI from task_oncpu_function_call() can land before perf_event_task_sched_in() and cause interesting situations for eg. perf_install_in_context(). This patch reworks the task_oncpu_function_call() interface to give a more usable primitive as well as rework all its users to hopefully be more obvious as well as remove the races. While looking at the code I also found a number of races against perf_event_task_sched_out() which can flip contexts between tasks so plug those too. Reported-and-reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
b84defe603
commit
fe4b04fa31
@ -2578,13 +2578,6 @@ static inline void inc_syscw(struct task_struct *tsk)
|
|||||||
#define TASK_SIZE_OF(tsk) TASK_SIZE
|
#define TASK_SIZE_OF(tsk) TASK_SIZE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Call the function if the target task is executing on a CPU right now:
|
|
||||||
*/
|
|
||||||
extern void task_oncpu_function_call(struct task_struct *p,
|
|
||||||
void (*func) (void *info), void *info);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_MM_OWNER
|
#ifdef CONFIG_MM_OWNER
|
||||||
extern void mm_update_next_owner(struct mm_struct *mm);
|
extern void mm_update_next_owner(struct mm_struct *mm);
|
||||||
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
|
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
|
||||||
|
@ -38,6 +38,79 @@
|
|||||||
|
|
||||||
#include <asm/irq_regs.h>
|
#include <asm/irq_regs.h>
|
||||||
|
|
||||||
|
struct remote_function_call {
|
||||||
|
struct task_struct *p;
|
||||||
|
int (*func)(void *info);
|
||||||
|
void *info;
|
||||||
|
int ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void remote_function(void *data)
|
||||||
|
{
|
||||||
|
struct remote_function_call *tfc = data;
|
||||||
|
struct task_struct *p = tfc->p;
|
||||||
|
|
||||||
|
if (p) {
|
||||||
|
tfc->ret = -EAGAIN;
|
||||||
|
if (task_cpu(p) != smp_processor_id() || !task_curr(p))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tfc->ret = tfc->func(tfc->info);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* task_function_call - call a function on the cpu on which a task runs
|
||||||
|
* @p: the task to evaluate
|
||||||
|
* @func: the function to be called
|
||||||
|
* @info: the function call argument
|
||||||
|
*
|
||||||
|
* Calls the function @func when the task is currently running. This might
|
||||||
|
* be on the current CPU, which just calls the function directly
|
||||||
|
*
|
||||||
|
* returns: @func return value, or
|
||||||
|
* -ESRCH - when the process isn't running
|
||||||
|
* -EAGAIN - when the process moved away
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
|
||||||
|
{
|
||||||
|
struct remote_function_call data = {
|
||||||
|
.p = p,
|
||||||
|
.func = func,
|
||||||
|
.info = info,
|
||||||
|
.ret = -ESRCH, /* No such (running) process */
|
||||||
|
};
|
||||||
|
|
||||||
|
if (task_curr(p))
|
||||||
|
smp_call_function_single(task_cpu(p), remote_function, &data, 1);
|
||||||
|
|
||||||
|
return data.ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpu_function_call - call a function on the cpu
|
||||||
|
* @func: the function to be called
|
||||||
|
* @info: the function call argument
|
||||||
|
*
|
||||||
|
* Calls the function @func on the remote cpu.
|
||||||
|
*
|
||||||
|
* returns: @func return value or -ENXIO when the cpu is offline
|
||||||
|
*/
|
||||||
|
static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
|
||||||
|
{
|
||||||
|
struct remote_function_call data = {
|
||||||
|
.p = NULL,
|
||||||
|
.func = func,
|
||||||
|
.info = info,
|
||||||
|
.ret = -ENXIO, /* No such CPU */
|
||||||
|
};
|
||||||
|
|
||||||
|
smp_call_function_single(cpu, remote_function, &data, 1);
|
||||||
|
|
||||||
|
return data.ret;
|
||||||
|
}
|
||||||
|
|
||||||
enum event_type_t {
|
enum event_type_t {
|
||||||
EVENT_FLEXIBLE = 0x1,
|
EVENT_FLEXIBLE = 0x1,
|
||||||
EVENT_PINNED = 0x2,
|
EVENT_PINNED = 0x2,
|
||||||
@ -254,7 +327,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
|
|||||||
raw_spin_lock_irqsave(&ctx->lock, flags);
|
raw_spin_lock_irqsave(&ctx->lock, flags);
|
||||||
--ctx->pin_count;
|
--ctx->pin_count;
|
||||||
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
||||||
put_ctx(ctx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -618,35 +690,24 @@ __get_cpu_context(struct perf_event_context *ctx)
|
|||||||
* We disable the event on the hardware level first. After that we
|
* We disable the event on the hardware level first. After that we
|
||||||
* remove it from the context list.
|
* remove it from the context list.
|
||||||
*/
|
*/
|
||||||
static void __perf_event_remove_from_context(void *info)
|
static int __perf_remove_from_context(void *info)
|
||||||
{
|
{
|
||||||
struct perf_event *event = info;
|
struct perf_event *event = info;
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||||
|
|
||||||
/*
|
|
||||||
* If this is a task context, we need to check whether it is
|
|
||||||
* the current task context of this cpu. If not it has been
|
|
||||||
* scheduled out before the smp call arrived.
|
|
||||||
*/
|
|
||||||
if (ctx->task && cpuctx->task_ctx != ctx)
|
|
||||||
return;
|
|
||||||
|
|
||||||
raw_spin_lock(&ctx->lock);
|
raw_spin_lock(&ctx->lock);
|
||||||
|
|
||||||
event_sched_out(event, cpuctx, ctx);
|
event_sched_out(event, cpuctx, ctx);
|
||||||
|
|
||||||
list_del_event(event, ctx);
|
list_del_event(event, ctx);
|
||||||
|
|
||||||
raw_spin_unlock(&ctx->lock);
|
raw_spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove the event from a task's (or a CPU's) list of events.
|
* Remove the event from a task's (or a CPU's) list of events.
|
||||||
*
|
*
|
||||||
* Must be called with ctx->mutex held.
|
|
||||||
*
|
|
||||||
* CPU events are removed with a smp call. For task events we only
|
* CPU events are removed with a smp call. For task events we only
|
||||||
* call when the task is on a CPU.
|
* call when the task is on a CPU.
|
||||||
*
|
*
|
||||||
@ -657,49 +718,48 @@ static void __perf_event_remove_from_context(void *info)
|
|||||||
* When called from perf_event_exit_task, it's OK because the
|
* When called from perf_event_exit_task, it's OK because the
|
||||||
* context has been detached from its task.
|
* context has been detached from its task.
|
||||||
*/
|
*/
|
||||||
static void perf_event_remove_from_context(struct perf_event *event)
|
static void perf_remove_from_context(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
struct task_struct *task = ctx->task;
|
struct task_struct *task = ctx->task;
|
||||||
|
|
||||||
|
lockdep_assert_held(&ctx->mutex);
|
||||||
|
|
||||||
if (!task) {
|
if (!task) {
|
||||||
/*
|
/*
|
||||||
* Per cpu events are removed via an smp call and
|
* Per cpu events are removed via an smp call and
|
||||||
* the removal is always successful.
|
* the removal is always successful.
|
||||||
*/
|
*/
|
||||||
smp_call_function_single(event->cpu,
|
cpu_function_call(event->cpu, __perf_remove_from_context, event);
|
||||||
__perf_event_remove_from_context,
|
|
||||||
event, 1);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
task_oncpu_function_call(task, __perf_event_remove_from_context,
|
if (!task_function_call(task, __perf_remove_from_context, event))
|
||||||
event);
|
return;
|
||||||
|
|
||||||
raw_spin_lock_irq(&ctx->lock);
|
raw_spin_lock_irq(&ctx->lock);
|
||||||
/*
|
/*
|
||||||
* If the context is active we need to retry the smp call.
|
* If we failed to find a running task, but find the context active now
|
||||||
|
* that we've acquired the ctx->lock, retry.
|
||||||
*/
|
*/
|
||||||
if (ctx->nr_active && !list_empty(&event->group_entry)) {
|
if (ctx->is_active) {
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The lock prevents that this context is scheduled in so we
|
* Since the task isn't running, its safe to remove the event, us
|
||||||
* can remove the event safely, if the call above did not
|
* holding the ctx->lock ensures the task won't get scheduled in.
|
||||||
* succeed.
|
|
||||||
*/
|
*/
|
||||||
if (!list_empty(&event->group_entry))
|
list_del_event(event, ctx);
|
||||||
list_del_event(event, ctx);
|
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cross CPU call to disable a performance event
|
* Cross CPU call to disable a performance event
|
||||||
*/
|
*/
|
||||||
static void __perf_event_disable(void *info)
|
static int __perf_event_disable(void *info)
|
||||||
{
|
{
|
||||||
struct perf_event *event = info;
|
struct perf_event *event = info;
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
@ -708,9 +768,12 @@ static void __perf_event_disable(void *info)
|
|||||||
/*
|
/*
|
||||||
* If this is a per-task event, need to check whether this
|
* If this is a per-task event, need to check whether this
|
||||||
* event's task is the current task on this cpu.
|
* event's task is the current task on this cpu.
|
||||||
|
*
|
||||||
|
* Can trigger due to concurrent perf_event_context_sched_out()
|
||||||
|
* flipping contexts around.
|
||||||
*/
|
*/
|
||||||
if (ctx->task && cpuctx->task_ctx != ctx)
|
if (ctx->task && cpuctx->task_ctx != ctx)
|
||||||
return;
|
return -EINVAL;
|
||||||
|
|
||||||
raw_spin_lock(&ctx->lock);
|
raw_spin_lock(&ctx->lock);
|
||||||
|
|
||||||
@ -729,6 +792,8 @@ static void __perf_event_disable(void *info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock(&ctx->lock);
|
raw_spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -753,13 +818,13 @@ void perf_event_disable(struct perf_event *event)
|
|||||||
/*
|
/*
|
||||||
* Disable the event on the cpu that it's on
|
* Disable the event on the cpu that it's on
|
||||||
*/
|
*/
|
||||||
smp_call_function_single(event->cpu, __perf_event_disable,
|
cpu_function_call(event->cpu, __perf_event_disable, event);
|
||||||
event, 1);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
task_oncpu_function_call(task, __perf_event_disable, event);
|
if (!task_function_call(task, __perf_event_disable, event))
|
||||||
|
return;
|
||||||
|
|
||||||
raw_spin_lock_irq(&ctx->lock);
|
raw_spin_lock_irq(&ctx->lock);
|
||||||
/*
|
/*
|
||||||
@ -767,6 +832,11 @@ retry:
|
|||||||
*/
|
*/
|
||||||
if (event->state == PERF_EVENT_STATE_ACTIVE) {
|
if (event->state == PERF_EVENT_STATE_ACTIVE) {
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
|
/*
|
||||||
|
* Reload the task pointer, it might have been changed by
|
||||||
|
* a concurrent perf_event_context_sched_out().
|
||||||
|
*/
|
||||||
|
task = ctx->task;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -778,7 +848,6 @@ retry:
|
|||||||
update_group_times(event);
|
update_group_times(event);
|
||||||
event->state = PERF_EVENT_STATE_OFF;
|
event->state = PERF_EVENT_STATE_OFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -928,12 +997,14 @@ static void add_event_to_ctx(struct perf_event *event,
|
|||||||
event->tstamp_stopped = tstamp;
|
event->tstamp_stopped = tstamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void perf_event_context_sched_in(struct perf_event_context *ctx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cross CPU call to install and enable a performance event
|
* Cross CPU call to install and enable a performance event
|
||||||
*
|
*
|
||||||
* Must be called with ctx->mutex held
|
* Must be called with ctx->mutex held
|
||||||
*/
|
*/
|
||||||
static void __perf_install_in_context(void *info)
|
static int __perf_install_in_context(void *info)
|
||||||
{
|
{
|
||||||
struct perf_event *event = info;
|
struct perf_event *event = info;
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
@ -942,17 +1013,12 @@ static void __perf_install_in_context(void *info)
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is a task context, we need to check whether it is
|
* In case we're installing a new context to an already running task,
|
||||||
* the current task context of this cpu. If not it has been
|
* could also happen before perf_event_task_sched_in() on architectures
|
||||||
* scheduled out before the smp call arrived.
|
* which do context switches with IRQs enabled.
|
||||||
* Or possibly this is the right context but it isn't
|
|
||||||
* on this cpu because it had no events.
|
|
||||||
*/
|
*/
|
||||||
if (ctx->task && cpuctx->task_ctx != ctx) {
|
if (ctx->task && !cpuctx->task_ctx)
|
||||||
if (cpuctx->task_ctx || ctx->task != current)
|
perf_event_context_sched_in(ctx);
|
||||||
return;
|
|
||||||
cpuctx->task_ctx = ctx;
|
|
||||||
}
|
|
||||||
|
|
||||||
raw_spin_lock(&ctx->lock);
|
raw_spin_lock(&ctx->lock);
|
||||||
ctx->is_active = 1;
|
ctx->is_active = 1;
|
||||||
@ -997,6 +1063,8 @@ static void __perf_install_in_context(void *info)
|
|||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
raw_spin_unlock(&ctx->lock);
|
raw_spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1008,8 +1076,6 @@ unlock:
|
|||||||
* If the event is attached to a task which is on a CPU we use a smp
|
* If the event is attached to a task which is on a CPU we use a smp
|
||||||
* call to enable it in the task context. The task might have been
|
* call to enable it in the task context. The task might have been
|
||||||
* scheduled away, but we check this in the smp call again.
|
* scheduled away, but we check this in the smp call again.
|
||||||
*
|
|
||||||
* Must be called with ctx->mutex held.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
perf_install_in_context(struct perf_event_context *ctx,
|
perf_install_in_context(struct perf_event_context *ctx,
|
||||||
@ -1018,6 +1084,8 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|||||||
{
|
{
|
||||||
struct task_struct *task = ctx->task;
|
struct task_struct *task = ctx->task;
|
||||||
|
|
||||||
|
lockdep_assert_held(&ctx->mutex);
|
||||||
|
|
||||||
event->ctx = ctx;
|
event->ctx = ctx;
|
||||||
|
|
||||||
if (!task) {
|
if (!task) {
|
||||||
@ -1025,31 +1093,29 @@ perf_install_in_context(struct perf_event_context *ctx,
|
|||||||
* Per cpu events are installed via an smp call and
|
* Per cpu events are installed via an smp call and
|
||||||
* the install is always successful.
|
* the install is always successful.
|
||||||
*/
|
*/
|
||||||
smp_call_function_single(cpu, __perf_install_in_context,
|
cpu_function_call(cpu, __perf_install_in_context, event);
|
||||||
event, 1);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
task_oncpu_function_call(task, __perf_install_in_context,
|
if (!task_function_call(task, __perf_install_in_context, event))
|
||||||
event);
|
return;
|
||||||
|
|
||||||
raw_spin_lock_irq(&ctx->lock);
|
raw_spin_lock_irq(&ctx->lock);
|
||||||
/*
|
/*
|
||||||
* we need to retry the smp call.
|
* If we failed to find a running task, but find the context active now
|
||||||
|
* that we've acquired the ctx->lock, retry.
|
||||||
*/
|
*/
|
||||||
if (ctx->is_active && list_empty(&event->group_entry)) {
|
if (ctx->is_active) {
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The lock prevents that this context is scheduled in so we
|
* Since the task isn't running, its safe to add the event, us holding
|
||||||
* can add the event safely, if it the call above did not
|
* the ctx->lock ensures the task won't get scheduled in.
|
||||||
* succeed.
|
|
||||||
*/
|
*/
|
||||||
if (list_empty(&event->group_entry))
|
add_event_to_ctx(event, ctx);
|
||||||
add_event_to_ctx(event, ctx);
|
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1078,7 +1144,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
|
|||||||
/*
|
/*
|
||||||
* Cross CPU call to enable a performance event
|
* Cross CPU call to enable a performance event
|
||||||
*/
|
*/
|
||||||
static void __perf_event_enable(void *info)
|
static int __perf_event_enable(void *info)
|
||||||
{
|
{
|
||||||
struct perf_event *event = info;
|
struct perf_event *event = info;
|
||||||
struct perf_event_context *ctx = event->ctx;
|
struct perf_event_context *ctx = event->ctx;
|
||||||
@ -1086,18 +1152,10 @@ static void __perf_event_enable(void *info)
|
|||||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
if (WARN_ON_ONCE(!ctx->is_active))
|
||||||
* If this is a per-task event, need to check whether this
|
return -EINVAL;
|
||||||
* event's task is the current task on this cpu.
|
|
||||||
*/
|
|
||||||
if (ctx->task && cpuctx->task_ctx != ctx) {
|
|
||||||
if (cpuctx->task_ctx || ctx->task != current)
|
|
||||||
return;
|
|
||||||
cpuctx->task_ctx = ctx;
|
|
||||||
}
|
|
||||||
|
|
||||||
raw_spin_lock(&ctx->lock);
|
raw_spin_lock(&ctx->lock);
|
||||||
ctx->is_active = 1;
|
|
||||||
update_context_time(ctx);
|
update_context_time(ctx);
|
||||||
|
|
||||||
if (event->state >= PERF_EVENT_STATE_INACTIVE)
|
if (event->state >= PERF_EVENT_STATE_INACTIVE)
|
||||||
@ -1138,6 +1196,8 @@ static void __perf_event_enable(void *info)
|
|||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
raw_spin_unlock(&ctx->lock);
|
raw_spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1158,8 +1218,7 @@ void perf_event_enable(struct perf_event *event)
|
|||||||
/*
|
/*
|
||||||
* Enable the event on the cpu that it's on
|
* Enable the event on the cpu that it's on
|
||||||
*/
|
*/
|
||||||
smp_call_function_single(event->cpu, __perf_event_enable,
|
cpu_function_call(event->cpu, __perf_event_enable, event);
|
||||||
event, 1);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1178,8 +1237,15 @@ void perf_event_enable(struct perf_event *event)
|
|||||||
event->state = PERF_EVENT_STATE_OFF;
|
event->state = PERF_EVENT_STATE_OFF;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
|
if (!ctx->is_active) {
|
||||||
|
__perf_event_mark_enabled(event, ctx);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
task_oncpu_function_call(task, __perf_event_enable, event);
|
|
||||||
|
if (!task_function_call(task, __perf_event_enable, event))
|
||||||
|
return;
|
||||||
|
|
||||||
raw_spin_lock_irq(&ctx->lock);
|
raw_spin_lock_irq(&ctx->lock);
|
||||||
|
|
||||||
@ -1187,15 +1253,14 @@ retry:
|
|||||||
* If the context is active and the event is still off,
|
* If the context is active and the event is still off,
|
||||||
* we need to retry the cross-call.
|
* we need to retry the cross-call.
|
||||||
*/
|
*/
|
||||||
if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
|
if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
|
||||||
|
/*
|
||||||
|
* task could have been flipped by a concurrent
|
||||||
|
* perf_event_context_sched_out()
|
||||||
|
*/
|
||||||
|
task = ctx->task;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
}
|
||||||
/*
|
|
||||||
* Since we have the lock this context can't be scheduled
|
|
||||||
* in, so we can change the state safely.
|
|
||||||
*/
|
|
||||||
if (event->state == PERF_EVENT_STATE_OFF)
|
|
||||||
__perf_event_mark_enabled(event, ctx);
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
@ -1339,8 +1404,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
||||||
struct task_struct *next)
|
struct task_struct *next)
|
||||||
{
|
{
|
||||||
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
|
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
|
||||||
struct perf_event_context *next_ctx;
|
struct perf_event_context *next_ctx;
|
||||||
@ -1533,7 +1598,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
|
|||||||
{
|
{
|
||||||
struct perf_cpu_context *cpuctx;
|
struct perf_cpu_context *cpuctx;
|
||||||
|
|
||||||
cpuctx = __get_cpu_context(ctx);
|
cpuctx = __get_cpu_context(ctx);
|
||||||
if (cpuctx->task_ctx == ctx)
|
if (cpuctx->task_ctx == ctx)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1541,7 +1606,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
|
|||||||
cpuctx->task_ctx = ctx;
|
cpuctx->task_ctx = ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
void perf_event_context_sched_in(struct perf_event_context *ctx)
|
static void perf_event_context_sched_in(struct perf_event_context *ctx)
|
||||||
{
|
{
|
||||||
struct perf_cpu_context *cpuctx;
|
struct perf_cpu_context *cpuctx;
|
||||||
|
|
||||||
@ -1627,7 +1692,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
|||||||
* Reduce accuracy by one bit such that @a and @b converge
|
* Reduce accuracy by one bit such that @a and @b converge
|
||||||
* to a similar magnitude.
|
* to a similar magnitude.
|
||||||
*/
|
*/
|
||||||
#define REDUCE_FLS(a, b) \
|
#define REDUCE_FLS(a, b) \
|
||||||
do { \
|
do { \
|
||||||
if (a##_fls > b##_fls) { \
|
if (a##_fls > b##_fls) { \
|
||||||
a >>= 1; \
|
a >>= 1; \
|
||||||
@ -2213,6 +2278,9 @@ errout:
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a matching context with refcount and pincount.
|
||||||
|
*/
|
||||||
static struct perf_event_context *
|
static struct perf_event_context *
|
||||||
find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
|
find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
|
||||||
{
|
{
|
||||||
@ -2237,6 +2305,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
|
|||||||
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
||||||
ctx = &cpuctx->ctx;
|
ctx = &cpuctx->ctx;
|
||||||
get_ctx(ctx);
|
get_ctx(ctx);
|
||||||
|
++ctx->pin_count;
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
@ -2250,6 +2319,7 @@ retry:
|
|||||||
ctx = perf_lock_task_context(task, ctxn, &flags);
|
ctx = perf_lock_task_context(task, ctxn, &flags);
|
||||||
if (ctx) {
|
if (ctx) {
|
||||||
unclone_ctx(ctx);
|
unclone_ctx(ctx);
|
||||||
|
++ctx->pin_count;
|
||||||
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2271,8 +2341,10 @@ retry:
|
|||||||
err = -ESRCH;
|
err = -ESRCH;
|
||||||
else if (task->perf_event_ctxp[ctxn])
|
else if (task->perf_event_ctxp[ctxn])
|
||||||
err = -EAGAIN;
|
err = -EAGAIN;
|
||||||
else
|
else {
|
||||||
|
++ctx->pin_count;
|
||||||
rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
|
rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
|
||||||
|
}
|
||||||
mutex_unlock(&task->perf_event_mutex);
|
mutex_unlock(&task->perf_event_mutex);
|
||||||
|
|
||||||
if (unlikely(err)) {
|
if (unlikely(err)) {
|
||||||
@ -5950,10 +6022,10 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||||||
struct perf_event_context *gctx = group_leader->ctx;
|
struct perf_event_context *gctx = group_leader->ctx;
|
||||||
|
|
||||||
mutex_lock(&gctx->mutex);
|
mutex_lock(&gctx->mutex);
|
||||||
perf_event_remove_from_context(group_leader);
|
perf_remove_from_context(group_leader);
|
||||||
list_for_each_entry(sibling, &group_leader->sibling_list,
|
list_for_each_entry(sibling, &group_leader->sibling_list,
|
||||||
group_entry) {
|
group_entry) {
|
||||||
perf_event_remove_from_context(sibling);
|
perf_remove_from_context(sibling);
|
||||||
put_ctx(gctx);
|
put_ctx(gctx);
|
||||||
}
|
}
|
||||||
mutex_unlock(&gctx->mutex);
|
mutex_unlock(&gctx->mutex);
|
||||||
@ -5976,6 +6048,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||||||
|
|
||||||
perf_install_in_context(ctx, event, cpu);
|
perf_install_in_context(ctx, event, cpu);
|
||||||
++ctx->generation;
|
++ctx->generation;
|
||||||
|
perf_unpin_context(ctx);
|
||||||
mutex_unlock(&ctx->mutex);
|
mutex_unlock(&ctx->mutex);
|
||||||
|
|
||||||
event->owner = current;
|
event->owner = current;
|
||||||
@ -6001,6 +6074,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||||||
return event_fd;
|
return event_fd;
|
||||||
|
|
||||||
err_context:
|
err_context:
|
||||||
|
perf_unpin_context(ctx);
|
||||||
put_ctx(ctx);
|
put_ctx(ctx);
|
||||||
err_alloc:
|
err_alloc:
|
||||||
free_event(event);
|
free_event(event);
|
||||||
@ -6051,6 +6125,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
|||||||
mutex_lock(&ctx->mutex);
|
mutex_lock(&ctx->mutex);
|
||||||
perf_install_in_context(ctx, event, cpu);
|
perf_install_in_context(ctx, event, cpu);
|
||||||
++ctx->generation;
|
++ctx->generation;
|
||||||
|
perf_unpin_context(ctx);
|
||||||
mutex_unlock(&ctx->mutex);
|
mutex_unlock(&ctx->mutex);
|
||||||
|
|
||||||
return event;
|
return event;
|
||||||
@ -6104,7 +6179,7 @@ __perf_event_exit_task(struct perf_event *child_event,
|
|||||||
{
|
{
|
||||||
struct perf_event *parent_event;
|
struct perf_event *parent_event;
|
||||||
|
|
||||||
perf_event_remove_from_context(child_event);
|
perf_remove_from_context(child_event);
|
||||||
|
|
||||||
parent_event = child_event->parent;
|
parent_event = child_event->parent;
|
||||||
/*
|
/*
|
||||||
@ -6411,7 +6486,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
child_ctx = child->perf_event_ctxp[ctxn];
|
child_ctx = child->perf_event_ctxp[ctxn];
|
||||||
if (!child_ctx) {
|
if (!child_ctx) {
|
||||||
/*
|
/*
|
||||||
* This is executed from the parent task context, so
|
* This is executed from the parent task context, so
|
||||||
@ -6526,6 +6601,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
|
|||||||
mutex_unlock(&parent_ctx->mutex);
|
mutex_unlock(&parent_ctx->mutex);
|
||||||
|
|
||||||
perf_unpin_context(parent_ctx);
|
perf_unpin_context(parent_ctx);
|
||||||
|
put_ctx(parent_ctx);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -6595,9 +6671,9 @@ static void __perf_event_exit_context(void *__info)
|
|||||||
perf_pmu_rotate_stop(ctx->pmu);
|
perf_pmu_rotate_stop(ctx->pmu);
|
||||||
|
|
||||||
list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
|
list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
|
||||||
__perf_event_remove_from_context(event);
|
__perf_remove_from_context(event);
|
||||||
list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
|
list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
|
||||||
__perf_event_remove_from_context(event);
|
__perf_remove_from_context(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_event_exit_cpu_context(int cpu)
|
static void perf_event_exit_cpu_context(int cpu)
|
||||||
|
@ -2265,27 +2265,6 @@ void kick_process(struct task_struct *p)
|
|||||||
EXPORT_SYMBOL_GPL(kick_process);
|
EXPORT_SYMBOL_GPL(kick_process);
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
/**
|
|
||||||
* task_oncpu_function_call - call a function on the cpu on which a task runs
|
|
||||||
* @p: the task to evaluate
|
|
||||||
* @func: the function to be called
|
|
||||||
* @info: the function call argument
|
|
||||||
*
|
|
||||||
* Calls the function @func when the task is currently running. This might
|
|
||||||
* be on the current CPU, which just calls the function directly
|
|
||||||
*/
|
|
||||||
void task_oncpu_function_call(struct task_struct *p,
|
|
||||||
void (*func) (void *info), void *info)
|
|
||||||
{
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
cpu = task_cpu(p);
|
|
||||||
if (task_curr(p))
|
|
||||||
smp_call_function_single(cpu, func, info, 1);
|
|
||||||
preempt_enable();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/*
|
/*
|
||||||
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
|
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
|
||||||
@ -2776,9 +2755,12 @@ static inline void
|
|||||||
prepare_task_switch(struct rq *rq, struct task_struct *prev,
|
prepare_task_switch(struct rq *rq, struct task_struct *prev,
|
||||||
struct task_struct *next)
|
struct task_struct *next)
|
||||||
{
|
{
|
||||||
|
sched_info_switch(prev, next);
|
||||||
|
perf_event_task_sched_out(prev, next);
|
||||||
fire_sched_out_preempt_notifiers(prev, next);
|
fire_sched_out_preempt_notifiers(prev, next);
|
||||||
prepare_lock_switch(rq, next);
|
prepare_lock_switch(rq, next);
|
||||||
prepare_arch_switch(next);
|
prepare_arch_switch(next);
|
||||||
|
trace_sched_switch(prev, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2911,7 +2893,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
|||||||
struct mm_struct *mm, *oldmm;
|
struct mm_struct *mm, *oldmm;
|
||||||
|
|
||||||
prepare_task_switch(rq, prev, next);
|
prepare_task_switch(rq, prev, next);
|
||||||
trace_sched_switch(prev, next);
|
|
||||||
mm = next->mm;
|
mm = next->mm;
|
||||||
oldmm = prev->active_mm;
|
oldmm = prev->active_mm;
|
||||||
/*
|
/*
|
||||||
@ -3989,9 +3971,6 @@ need_resched_nonpreemptible:
|
|||||||
rq->skip_clock_update = 0;
|
rq->skip_clock_update = 0;
|
||||||
|
|
||||||
if (likely(prev != next)) {
|
if (likely(prev != next)) {
|
||||||
sched_info_switch(prev, next);
|
|
||||||
perf_event_task_sched_out(prev, next);
|
|
||||||
|
|
||||||
rq->nr_switches++;
|
rq->nr_switches++;
|
||||||
rq->curr = next;
|
rq->curr = next;
|
||||||
++*switch_count;
|
++*switch_count;
|
||||||
|
Loading…
Reference in New Issue
Block a user