sched: Make select_fallback_rq() cpuset friendly

Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems
with select_fallback_rq(). It can be called from any context and can't use
any cpuset locks including task_lock(). It is called when the task doesn't
have online cpus in ->cpus_allowed but ttwu/etc must be able to find a
suitable cpu.

I am not proud of this patch. Everything which needs such a fat comment
can't be good even if correct. But I'd prefer to not change the locking
rules in the code I hardly understand, and in any case I believe this
simple change make the code much more correct compared to deadlocks we
currently have.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091027.GA9155@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Oleg Nesterov 2010-03-15 10:10:27 +01:00 committed by Ingo Molnar
parent 6a1bdc1b57
commit 9084bb8246
3 changed files with 50 additions and 3 deletions

View File

@ -21,6 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init(void); extern int cpuset_init(void);
extern void cpuset_init_smp(void); extern void cpuset_init_smp(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed) #define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void); void cpuset_init_current_mems_allowed(void);
@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
cpumask_copy(mask, cpu_possible_mask); cpumask_copy(mask, cpu_possible_mask);
} }
static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
{
cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
return cpumask_any(cpu_active_mask);
}
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{ {
return node_possible_map; return node_possible_map;

View File

@ -2188,6 +2188,48 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
} }
int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
const struct cpuset *cs;
int cpu;
rcu_read_lock();
cs = task_cs(tsk);
if (cs)
cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
rcu_read_unlock();
/*
* We own tsk->cpus_allowed, nobody can change it under us.
*
* But we used cs && cs->cpus_allowed lockless and thus can
* race with cgroup_attach_task() or update_cpumask() and get
* the wrong tsk->cpus_allowed. However, both cases imply the
* subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
* which takes task_rq_lock().
*
* If we are called after it dropped the lock we must see all
* changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
* set any mask even if it is not right from task_cs() pov,
* the pending set_cpus_allowed_ptr() will fix things.
*/
cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
if (cpu >= nr_cpu_ids) {
/*
* Either tsk->cpus_allowed is wrong (see above) or it
* is actually empty. The latter case is only possible
* if we are racing with remove_tasks_in_empty_cpuset().
* Like above we can temporary set any mask and rely on
* set_cpus_allowed_ptr() as synchronization point.
*/
cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
cpu = cpumask_any(cpu_active_mask);
}
return cpu;
}
void cpuset_init_current_mems_allowed(void) void cpuset_init_current_mems_allowed(void)
{ {
nodes_setall(current->mems_allowed); nodes_setall(current->mems_allowed);

View File

@ -2300,9 +2300,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* No more Mr. Nice Guy. */ /* No more Mr. Nice Guy. */
if (unlikely(dest_cpu >= nr_cpu_ids)) { if (unlikely(dest_cpu >= nr_cpu_ids)) {
cpumask_copy(&p->cpus_allowed, cpu_possible_mask); dest_cpu = cpuset_cpus_allowed_fallback(p);
dest_cpu = cpumask_any(cpu_active_mask);
/* /*
* Don't tell them about moving exiting tasks or * Don't tell them about moving exiting tasks or
* kernel threads (both mm NULL), since they never * kernel threads (both mm NULL), since they never