mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 07:34:12 +08:00
Revert "sched: Improve scalability via 'CPU buddies', which withstand random perturbations"
This reverts commit970e178985
. Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20% performance drop on PostgreSQL 9.2 on his machine (running "pgbench"). Borislav Petkov was able to reproduce this, and bisected it to this commit970e178985
("sched: Improve scalability via 'CPU buddies' ...") apparently because the new single-idle-buddy model simply doesn't find idle CPU's to reschedule on aggressively enough. Mike Galbraith suspects that it is likely due to the user-mode spinlocks in PostgreSQL not reacting well to preemption, but we don't really know the details - I'll just revert the commit for now. There are hopefully other approaches to improve scheduler scalability without it causing these kinds of downsides. Reported-by: Nikolay Ulyanitsky <lystor@gmail.com> Bisected-by: Borislav Petkov <bp@alien8.de> Acked-by: Mike Galbraith <efault@gmx.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
3f0c3c8fe3
commit
37407ea7f9
@ -954,7 +954,6 @@ struct sched_domain {
|
|||||||
unsigned int smt_gain;
|
unsigned int smt_gain;
|
||||||
int flags; /* See SD_* */
|
int flags; /* See SD_* */
|
||||||
int level;
|
int level;
|
||||||
int idle_buddy; /* cpu assigned to select_idle_sibling() */
|
|
||||||
|
|
||||||
/* Runtime fields. */
|
/* Runtime fields. */
|
||||||
unsigned long last_balance; /* init to jiffies. units in jiffies */
|
unsigned long last_balance; /* init to jiffies. units in jiffies */
|
||||||
|
@ -6014,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
|
|||||||
* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
|
* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
|
||||||
* allows us to avoid some pointer chasing select_idle_sibling().
|
* allows us to avoid some pointer chasing select_idle_sibling().
|
||||||
*
|
*
|
||||||
* Iterate domains and sched_groups downward, assigning CPUs to be
|
|
||||||
* select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing
|
|
||||||
* due to random perturbation self canceling, ie sw buddies pull
|
|
||||||
* their counterpart to their CPU's hw counterpart.
|
|
||||||
*
|
|
||||||
* Also keep a unique ID per domain (we use the first cpu number in
|
* Also keep a unique ID per domain (we use the first cpu number in
|
||||||
* the cpumask of the domain), this allows us to quickly tell if
|
* the cpumask of the domain), this allows us to quickly tell if
|
||||||
* two cpus are in the same cache domain, see cpus_share_cache().
|
* two cpus are in the same cache domain, see cpus_share_cache().
|
||||||
@ -6032,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
|
|||||||
int id = cpu;
|
int id = cpu;
|
||||||
|
|
||||||
sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
|
sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
|
||||||
if (sd) {
|
if (sd)
|
||||||
struct sched_domain *tmp = sd;
|
|
||||||
struct sched_group *sg, *prev;
|
|
||||||
bool right;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Traverse to first CPU in group, and count hops
|
|
||||||
* to cpu from there, switching direction on each
|
|
||||||
* hop, never ever pointing the last CPU rightward.
|
|
||||||
*/
|
|
||||||
do {
|
|
||||||
id = cpumask_first(sched_domain_span(tmp));
|
|
||||||
prev = sg = tmp->groups;
|
|
||||||
right = 1;
|
|
||||||
|
|
||||||
while (cpumask_first(sched_group_cpus(sg)) != id)
|
|
||||||
sg = sg->next;
|
|
||||||
|
|
||||||
while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
|
|
||||||
prev = sg;
|
|
||||||
sg = sg->next;
|
|
||||||
right = !right;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* A CPU went down, never point back to domain start. */
|
|
||||||
if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
|
|
||||||
right = false;
|
|
||||||
|
|
||||||
sg = right ? sg->next : prev;
|
|
||||||
tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
|
|
||||||
} while ((tmp = tmp->child));
|
|
||||||
|
|
||||||
id = cpumask_first(sched_domain_span(sd));
|
id = cpumask_first(sched_domain_span(sd));
|
||||||
}
|
|
||||||
|
|
||||||
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
|
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
|
||||||
per_cpu(sd_llc_id, cpu) = id;
|
per_cpu(sd_llc_id, cpu) = id;
|
||||||
|
@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
|
|||||||
int cpu = smp_processor_id();
|
int cpu = smp_processor_id();
|
||||||
int prev_cpu = task_cpu(p);
|
int prev_cpu = task_cpu(p);
|
||||||
struct sched_domain *sd;
|
struct sched_domain *sd;
|
||||||
|
struct sched_group *sg;
|
||||||
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the task is going to be woken-up on this cpu and if it is
|
* If the task is going to be woken-up on this cpu and if it is
|
||||||
@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target)
|
|||||||
return prev_cpu;
|
return prev_cpu;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Otherwise, check assigned siblings to find an elegible idle cpu.
|
* Otherwise, iterate the domains and find an elegible idle cpu.
|
||||||
*/
|
*/
|
||||||
sd = rcu_dereference(per_cpu(sd_llc, target));
|
sd = rcu_dereference(per_cpu(sd_llc, target));
|
||||||
|
|
||||||
for_each_lower_domain(sd) {
|
for_each_lower_domain(sd) {
|
||||||
if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p)))
|
sg = sd->groups;
|
||||||
continue;
|
do {
|
||||||
if (idle_cpu(sd->idle_buddy))
|
if (!cpumask_intersects(sched_group_cpus(sg),
|
||||||
return sd->idle_buddy;
|
tsk_cpus_allowed(p)))
|
||||||
}
|
goto next;
|
||||||
|
|
||||||
|
for_each_cpu(i, sched_group_cpus(sg)) {
|
||||||
|
if (!idle_cpu(i))
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
target = cpumask_first_and(sched_group_cpus(sg),
|
||||||
|
tsk_cpus_allowed(p));
|
||||||
|
goto done;
|
||||||
|
next:
|
||||||
|
sg = sg->next;
|
||||||
|
} while (sg != sd->groups);
|
||||||
|
}
|
||||||
|
done:
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user