mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-23 20:53:53 +08:00
sched/fair: Make sched-idle CPU selection consistent throughout
There are instances where we keep searching for an idle CPU despite already having a sched-idle CPU (in find_idlest_group_cpu(), select_idle_smt() and select_idle_cpu() and then there are places where we don't necessarily do that and return a sched-idle CPU as soon as we find one (in select_idle_sibling()). This looks a bit inconsistent and it may be worth having the same policy everywhere. On the other hand, choosing a sched-idle CPU over a idle one shall be beneficial from performance and power point of view as well, as we don't need to get the CPU online from a deep idle state which wastes quite a lot of time and energy and delays the scheduling of the newly woken up task. This patch tries to simplify code around sched-idle CPU selection and make it consistent throughout. Testing is done with the help of rt-app on hikey board (ARM64 octa-core, 2 clusters, 0-3 and 4-7). The cpufreq governor was set to performance to avoid any side affects from CPU frequency. Following are the tests performed: Test 1: 1-cfs-task: A single SCHED_NORMAL task is pinned to CPU5 which runs for 2333 us out of 7777 us (so gives time for the cluster to go in deep idle state). Test 2: 1-cfs-1-idle-task: A single SCHED_NORMAL task is pinned on CPU5 and single SCHED_IDLE task is pinned on CPU6 (to make sure cluster 1 doesn't go in deep idle state). Test 3: 1-cfs-8-idle-task: A single SCHED_NORMAL task is pinned on CPU5 and eight SCHED_IDLE tasks are created which run forever (not pinned anywhere, so they run on all CPUs). Checked with kernelshark that as soon as NORMAL task sleeps, the SCHED_IDLE task starts running on CPU5. And here are the results on mean latency (in us), using the "st" tool. $ st 1-cfs-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 90 592 197180 307.134 109.906 $ st 1-cfs-1-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 67 311 113850 177.336 41.4251 $ st 1-cfs-8-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 643 29 173 41364 64.3297 13.2344 The mean latency when we need to: - wakeup from deep idle state is 307 us. - wakeup from shallow idle state is 177 us. - preempt a SCHED_IDLE task is 64 us. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/b90cbcce608cef4e02a7bbfe178335f76d201bab.1573728344.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
53a23364b6
commit
17346452b2
@ -5588,7 +5588,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
||||
unsigned int min_exit_latency = UINT_MAX;
|
||||
u64 latest_idle_timestamp = 0;
|
||||
int least_loaded_cpu = this_cpu;
|
||||
int shallowest_idle_cpu = -1, si_cpu = -1;
|
||||
int shallowest_idle_cpu = -1;
|
||||
int i;
|
||||
|
||||
/* Check if we have any choice: */
|
||||
@ -5597,6 +5597,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
||||
|
||||
/* Traverse only the allowed CPUs */
|
||||
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
|
||||
if (sched_idle_cpu(i))
|
||||
return i;
|
||||
|
||||
if (available_idle_cpu(i)) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct cpuidle_state *idle = idle_get_state(rq);
|
||||
@ -5619,12 +5622,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
||||
latest_idle_timestamp = rq->idle_stamp;
|
||||
shallowest_idle_cpu = i;
|
||||
}
|
||||
} else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
|
||||
if (sched_idle_cpu(i)) {
|
||||
si_cpu = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if (shallowest_idle_cpu == -1) {
|
||||
load = cpu_load(cpu_rq(i));
|
||||
if (load < min_load) {
|
||||
min_load = load;
|
||||
@ -5633,11 +5631,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
|
||||
}
|
||||
}
|
||||
|
||||
if (shallowest_idle_cpu != -1)
|
||||
return shallowest_idle_cpu;
|
||||
if (si_cpu != -1)
|
||||
return si_cpu;
|
||||
return least_loaded_cpu;
|
||||
return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
|
||||
}
|
||||
|
||||
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
|
||||
@ -5790,7 +5784,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
|
||||
*/
|
||||
static int select_idle_smt(struct task_struct *p, int target)
|
||||
{
|
||||
int cpu, si_cpu = -1;
|
||||
int cpu;
|
||||
|
||||
if (!static_branch_likely(&sched_smt_present))
|
||||
return -1;
|
||||
@ -5798,13 +5792,11 @@ static int select_idle_smt(struct task_struct *p, int target)
|
||||
for_each_cpu(cpu, cpu_smt_mask(target)) {
|
||||
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
continue;
|
||||
if (available_idle_cpu(cpu))
|
||||
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
|
||||
return cpu;
|
||||
if (si_cpu == -1 && sched_idle_cpu(cpu))
|
||||
si_cpu = cpu;
|
||||
}
|
||||
|
||||
return si_cpu;
|
||||
return -1;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SCHED_SMT */
|
||||
@ -5834,7 +5826,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
u64 time, cost;
|
||||
s64 delta;
|
||||
int this = smp_processor_id();
|
||||
int cpu, nr = INT_MAX, si_cpu = -1;
|
||||
int cpu, nr = INT_MAX;
|
||||
|
||||
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
|
||||
if (!this_sd)
|
||||
@ -5864,11 +5856,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
|
||||
for_each_cpu_wrap(cpu, cpus, target) {
|
||||
if (!--nr)
|
||||
return si_cpu;
|
||||
if (available_idle_cpu(cpu))
|
||||
return -1;
|
||||
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
|
||||
break;
|
||||
if (si_cpu == -1 && sched_idle_cpu(cpu))
|
||||
si_cpu = cpu;
|
||||
}
|
||||
|
||||
time = cpu_clock(this) - time;
|
||||
|
Loading…
Reference in New Issue
Block a user