mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-07 13:13:57 +08:00
sched: adjust when cpu_active and cpuset configurations are updated during cpu on/offlining
Currently, when a cpu goes down, cpu_active is cleared before CPU_DOWN_PREPARE starts and cpuset configuration is updated from a default priority cpu notifier. When a cpu is coming up, it's set before CPU_ONLINE but cpuset configuration again is updated from the same cpu notifier. For cpu notifiers, this presents an inconsistent state. Threads which a CPU_DOWN_PREPARE notifier expects to be bound to the CPU can be migrated to other cpus because the cpu is no more inactive. Fix it by updating cpu_active in the highest priority cpu notifier and cpuset configuration in the second highest when a cpu is coming up. Down path is updated similarly. This guarantees that all other cpu notifiers see consistent cpu_active and cpuset configuration. cpuset_track_online_cpus() notifier is converted to cpuset_update_active_cpus() which just updates the configuration and now called from cpuset_cpu_[in]active() notifiers registered from sched_init_smp(). If cpuset is disabled, cpuset_update_active_cpus() degenerates into partition_sched_domains() making separate notifier for !CONFIG_CPUSETS unnecessary. This problem is triggered by cmwq. During CPU_DOWN_PREPARE, hotplug callback creates a kthread and kthread_bind()s it to the target cpu, and the thread is expected to run on that cpu. * Ingo's test discovered __cpuinit/exit markups were incorrect. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Menage <menage@google.com>
This commit is contained in:
parent
50a323b730
commit
3a101d0548
@ -52,6 +52,22 @@ struct notifier_block;
|
|||||||
* CPU notifier priorities.
|
* CPU notifier priorities.
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
|
/*
|
||||||
|
* SCHED_ACTIVE marks a cpu which is coming up active during
|
||||||
|
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
|
||||||
|
* notifier. CPUSET_ACTIVE adjusts cpuset according to
|
||||||
|
* cpu_active mask right after SCHED_ACTIVE. During
|
||||||
|
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
|
||||||
|
* ordered in the similar way.
|
||||||
|
*
|
||||||
|
* This ordering guarantees consistent cpu_active mask and
|
||||||
|
* migration behavior to all cpu notifiers.
|
||||||
|
*/
|
||||||
|
CPU_PRI_SCHED_ACTIVE = INT_MAX,
|
||||||
|
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
|
||||||
|
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
|
||||||
|
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
|
||||||
|
|
||||||
/* migration should happen before other stuff but after perf */
|
/* migration should happen before other stuff but after perf */
|
||||||
CPU_PRI_PERF = 20,
|
CPU_PRI_PERF = 20,
|
||||||
CPU_PRI_MIGRATION = 10,
|
CPU_PRI_MIGRATION = 10,
|
||||||
|
@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
|
|||||||
|
|
||||||
extern int cpuset_init(void);
|
extern int cpuset_init(void);
|
||||||
extern void cpuset_init_smp(void);
|
extern void cpuset_init_smp(void);
|
||||||
|
extern void cpuset_update_active_cpus(void);
|
||||||
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
||||||
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
|
extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
|
||||||
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
||||||
@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
|
|||||||
static inline int cpuset_init(void) { return 0; }
|
static inline int cpuset_init(void) { return 0; }
|
||||||
static inline void cpuset_init_smp(void) {}
|
static inline void cpuset_init_smp(void) {}
|
||||||
|
|
||||||
|
static inline void cpuset_update_active_cpus(void)
|
||||||
|
{
|
||||||
|
partition_sched_domains(1, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void cpuset_cpus_allowed(struct task_struct *p,
|
static inline void cpuset_cpus_allowed(struct task_struct *p,
|
||||||
struct cpumask *mask)
|
struct cpumask *mask)
|
||||||
{
|
{
|
||||||
|
@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
cpu_hotplug_begin();
|
cpu_hotplug_begin();
|
||||||
set_cpu_active(cpu, false);
|
|
||||||
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
|
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
|
||||||
if (err) {
|
if (err) {
|
||||||
set_cpu_active(cpu, true);
|
|
||||||
|
|
||||||
nr_calls--;
|
nr_calls--;
|
||||||
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
|
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
|
||||||
printk("%s: attempt to take down CPU %u failed\n",
|
printk("%s: attempt to take down CPU %u failed\n",
|
||||||
@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
|||||||
|
|
||||||
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
||||||
if (err) {
|
if (err) {
|
||||||
set_cpu_active(cpu, true);
|
|
||||||
/* CPU didn't die: tell everyone. Can't complain. */
|
/* CPU didn't die: tell everyone. Can't complain. */
|
||||||
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
|
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
|
||||||
|
|
||||||
@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
|||||||
goto out_notify;
|
goto out_notify;
|
||||||
BUG_ON(!cpu_online(cpu));
|
BUG_ON(!cpu_online(cpu));
|
||||||
|
|
||||||
set_cpu_active(cpu, true);
|
|
||||||
|
|
||||||
/* Now call notifier in preparation. */
|
/* Now call notifier in preparation. */
|
||||||
cpu_notify(CPU_ONLINE | mod, hcpu);
|
cpu_notify(CPU_ONLINE | mod, hcpu);
|
||||||
|
|
||||||
|
@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
|
|||||||
* but making no active use of cpusets.
|
* but making no active use of cpusets.
|
||||||
*
|
*
|
||||||
* This routine ensures that top_cpuset.cpus_allowed tracks
|
* This routine ensures that top_cpuset.cpus_allowed tracks
|
||||||
* cpu_online_map on each CPU hotplug (cpuhp) event.
|
* cpu_active_mask on each CPU hotplug (cpuhp) event.
|
||||||
*
|
*
|
||||||
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
||||||
* before calling generate_sched_domains().
|
* before calling generate_sched_domains().
|
||||||
*/
|
*/
|
||||||
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
void __cpuexit cpuset_update_active_cpus(void)
|
||||||
unsigned long phase, void *unused_cpu)
|
|
||||||
{
|
{
|
||||||
struct sched_domain_attr *attr;
|
struct sched_domain_attr *attr;
|
||||||
cpumask_var_t *doms;
|
cpumask_var_t *doms;
|
||||||
int ndoms;
|
int ndoms;
|
||||||
|
|
||||||
switch (phase) {
|
|
||||||
case CPU_ONLINE:
|
|
||||||
case CPU_ONLINE_FROZEN:
|
|
||||||
case CPU_DOWN_PREPARE:
|
|
||||||
case CPU_DOWN_PREPARE_FROZEN:
|
|
||||||
case CPU_DOWN_FAILED:
|
|
||||||
case CPU_DOWN_FAILED_FROZEN:
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return NOTIFY_DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
cgroup_lock();
|
cgroup_lock();
|
||||||
mutex_lock(&callback_mutex);
|
mutex_lock(&callback_mutex);
|
||||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
||||||
@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
|||||||
|
|
||||||
/* Have scheduler rebuild the domains */
|
/* Have scheduler rebuild the domains */
|
||||||
partition_sched_domains(ndoms, doms, attr);
|
partition_sched_domains(ndoms, doms, attr);
|
||||||
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
|
|||||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
||||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||||
|
|
||||||
hotcpu_notifier(cpuset_track_online_cpus, 0);
|
|
||||||
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
|
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
|
||||||
|
|
||||||
cpuset_wq = create_singlethread_workqueue("cpuset");
|
cpuset_wq = create_singlethread_workqueue("cpuset");
|
||||||
|
@ -5804,17 +5804,46 @@ static struct notifier_block __cpuinitdata migration_notifier = {
|
|||||||
.priority = CPU_PRI_MIGRATION,
|
.priority = CPU_PRI_MIGRATION,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
|
||||||
|
unsigned long action, void *hcpu)
|
||||||
|
{
|
||||||
|
switch (action & ~CPU_TASKS_FROZEN) {
|
||||||
|
case CPU_ONLINE:
|
||||||
|
case CPU_DOWN_FAILED:
|
||||||
|
set_cpu_active((long)hcpu, true);
|
||||||
|
return NOTIFY_OK;
|
||||||
|
default:
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
|
||||||
|
unsigned long action, void *hcpu)
|
||||||
|
{
|
||||||
|
switch (action & ~CPU_TASKS_FROZEN) {
|
||||||
|
case CPU_DOWN_PREPARE:
|
||||||
|
set_cpu_active((long)hcpu, false);
|
||||||
|
return NOTIFY_OK;
|
||||||
|
default:
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int __init migration_init(void)
|
static int __init migration_init(void)
|
||||||
{
|
{
|
||||||
void *cpu = (void *)(long)smp_processor_id();
|
void *cpu = (void *)(long)smp_processor_id();
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/* Start one for the boot CPU: */
|
/* Initialize migration for the boot CPU */
|
||||||
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
|
err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
|
||||||
BUG_ON(err == NOTIFY_BAD);
|
BUG_ON(err == NOTIFY_BAD);
|
||||||
migration_call(&migration_notifier, CPU_ONLINE, cpu);
|
migration_call(&migration_notifier, CPU_ONLINE, cpu);
|
||||||
register_cpu_notifier(&migration_notifier);
|
register_cpu_notifier(&migration_notifier);
|
||||||
|
|
||||||
|
/* Register cpu active notifiers */
|
||||||
|
cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
|
||||||
|
cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
early_initcall(migration_init);
|
early_initcall(migration_init);
|
||||||
@ -7273,29 +7302,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
|
|||||||
}
|
}
|
||||||
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
|
||||||
|
|
||||||
#ifndef CONFIG_CPUSETS
|
|
||||||
/*
|
/*
|
||||||
* Add online and remove offline CPUs from the scheduler domains.
|
* Update cpusets according to cpu_active mask. If cpusets are
|
||||||
* When cpusets are enabled they take over this function.
|
* disabled, cpuset_update_active_cpus() becomes a simple wrapper
|
||||||
|
* around partition_sched_domains().
|
||||||
*/
|
*/
|
||||||
static int update_sched_domains(struct notifier_block *nfb,
|
static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
|
||||||
unsigned long action, void *hcpu)
|
unsigned long action, void *hcpu)
|
||||||
{
|
{
|
||||||
switch (action) {
|
switch (action & ~CPU_TASKS_FROZEN) {
|
||||||
case CPU_ONLINE:
|
case CPU_ONLINE:
|
||||||
case CPU_ONLINE_FROZEN:
|
|
||||||
case CPU_DOWN_PREPARE:
|
|
||||||
case CPU_DOWN_PREPARE_FROZEN:
|
|
||||||
case CPU_DOWN_FAILED:
|
case CPU_DOWN_FAILED:
|
||||||
case CPU_DOWN_FAILED_FROZEN:
|
cpuset_update_active_cpus();
|
||||||
partition_sched_domains(1, NULL, NULL);
|
return NOTIFY_OK;
|
||||||
|
default:
|
||||||
|
return NOTIFY_DONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
|
||||||
|
unsigned long action, void *hcpu)
|
||||||
|
{
|
||||||
|
switch (action & ~CPU_TASKS_FROZEN) {
|
||||||
|
case CPU_DOWN_PREPARE:
|
||||||
|
cpuset_update_active_cpus();
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return NOTIFY_DONE;
|
return NOTIFY_DONE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static int update_runtime(struct notifier_block *nfb,
|
static int update_runtime(struct notifier_block *nfb,
|
||||||
unsigned long action, void *hcpu)
|
unsigned long action, void *hcpu)
|
||||||
@ -7341,10 +7376,8 @@ void __init sched_init_smp(void)
|
|||||||
mutex_unlock(&sched_domains_mutex);
|
mutex_unlock(&sched_domains_mutex);
|
||||||
put_online_cpus();
|
put_online_cpus();
|
||||||
|
|
||||||
#ifndef CONFIG_CPUSETS
|
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
|
||||||
/* XXX: Theoretical race here - CPU may be hotplugged now */
|
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
|
||||||
hotcpu_notifier(update_sched_domains, 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* RT runtime code needs to handle some hotplug events */
|
/* RT runtime code needs to handle some hotplug events */
|
||||||
hotcpu_notifier(update_runtime, 0);
|
hotcpu_notifier(update_runtime, 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user