mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-29 07:04:10 +08:00
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - massive CPU hotplug rework (Thomas Gleixner) - improve migration fairness (Peter Zijlstra) - CPU load calculation updates/cleanups (Yuyang Du) - cpufreq updates (Steve Muckle) - nohz optimizations (Frederic Weisbecker) - switch_mm() micro-optimization on x86 (Andy Lutomirski) - ... lots of other enhancements, fixes and cleanups. * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (66 commits) ARM: Hide finish_arch_post_lock_switch() from modules sched/core: Provide a tsk_nr_cpus_allowed() helper sched/core: Use tsk_cpus_allowed() instead of accessing ->cpus_allowed sched/loadavg: Fix loadavg artifacts on fully idle and on fully loaded systems sched/fair: Correct unit of load_above_capacity sched/fair: Clean up scale confusion sched/nohz: Fix affine unpinned timers mess sched/fair: Fix fairness issue on migration sched/core: Kill sched_class::task_waking to clean up the migration logic sched/fair: Prepare to fix fairness problems on migration sched/fair: Move record_wakee() sched/core: Fix comment typo in wake_q_add() sched/core: Remove unused variable sched: Make hrtick_notifier an explicit call sched/fair: Make ilb_notifier an explicit call sched/hotplug: Make activate() the last hotplug step sched/hotplug: Move migration CPU_DYING to sched_cpu_dying() sched/migration: Move CPU_ONLINE into scheduler state sched/migration: Move calc_load_migrate() into CPU_DYING sched/migration: Move prepare transition to SCHED_STARTING state ...
This commit is contained in:
commit
825a3b2605
@ -1562,12 +1562,12 @@ Doing the same with chrt -r 5 and function-trace set.
|
|||||||
<idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
|
||||||
<idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
|
||||||
<idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
|
||||||
<idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 13us : cpu_load_update_nohz <-tick_nohz_idle_exit
|
||||||
<idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz
|
<idle>-0 3dN.1 13us : _raw_spin_lock <-cpu_load_update_nohz
|
||||||
<idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
|
<idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
|
||||||
<idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz
|
<idle>-0 3dN.2 13us : __cpu_load_update <-cpu_load_update_nohz
|
||||||
<idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load
|
<idle>-0 3dN.2 14us : sched_avg_update <-__cpu_load_update
|
||||||
<idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz
|
<idle>-0 3dN.2 14us : _raw_spin_unlock <-cpu_load_update_nohz
|
||||||
<idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
|
<idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
|
||||||
<idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
|
||||||
<idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
|
<idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/preempt.h>
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/cachetype.h>
|
#include <asm/cachetype.h>
|
||||||
#include <asm/proc-fns.h>
|
#include <asm/proc-fns.h>
|
||||||
@ -66,6 +67,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
|
|||||||
cpu_switch_mm(mm->pgd, mm);
|
cpu_switch_mm(mm->pgd, mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef MODULE
|
||||||
#define finish_arch_post_lock_switch \
|
#define finish_arch_post_lock_switch \
|
||||||
finish_arch_post_lock_switch
|
finish_arch_post_lock_switch
|
||||||
static inline void finish_arch_post_lock_switch(void)
|
static inline void finish_arch_post_lock_switch(void)
|
||||||
@ -87,6 +89,7 @@ static inline void finish_arch_post_lock_switch(void)
|
|||||||
preempt_enable_no_resched();
|
preempt_enable_no_resched();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* !MODULE */
|
||||||
|
|
||||||
#endif /* CONFIG_MMU */
|
#endif /* CONFIG_MMU */
|
||||||
|
|
||||||
|
@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
|
|||||||
smp_ops->give_timebase();
|
smp_ops->give_timebase();
|
||||||
|
|
||||||
/* Wait until cpu puts itself in the online & active maps */
|
/* Wait until cpu puts itself in the online & active maps */
|
||||||
while (!cpu_online(cpu) || !cpu_active(cpu))
|
while (!cpu_online(cpu))
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
|
|||||||
pcpu_attach_task(pcpu, tidle);
|
pcpu_attach_task(pcpu, tidle);
|
||||||
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
|
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
|
||||||
/* Wait until cpu puts itself in the online & active maps */
|
/* Wait until cpu puts itself in the online & active maps */
|
||||||
while (!cpu_online(cpu) || !cpu_active(cpu))
|
while (!cpu_online(cpu))
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2183,7 +2183,7 @@ void arch_perf_update_userpage(struct perf_event *event,
|
|||||||
* cap_user_time_zero doesn't make sense when we're using a different
|
* cap_user_time_zero doesn't make sense when we're using a different
|
||||||
* time base for the records.
|
* time base for the records.
|
||||||
*/
|
*/
|
||||||
if (event->clock == &local_clock) {
|
if (!event->attr.use_clockid) {
|
||||||
userpg->cap_user_time_zero = 1;
|
userpg->cap_user_time_zero = 1;
|
||||||
userpg->time_zero = data->cyc2ns_offset;
|
userpg->time_zero = data->cyc2ns_offset;
|
||||||
}
|
}
|
||||||
|
@ -115,103 +115,12 @@ static inline void destroy_context(struct mm_struct *mm)
|
|||||||
destroy_context_ldt(mm);
|
destroy_context_ldt(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
struct task_struct *tsk)
|
struct task_struct *tsk);
|
||||||
{
|
|
||||||
unsigned cpu = smp_processor_id();
|
|
||||||
|
|
||||||
if (likely(prev != next)) {
|
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
#ifdef CONFIG_SMP
|
struct task_struct *tsk);
|
||||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
#define switch_mm_irqs_off switch_mm_irqs_off
|
||||||
this_cpu_write(cpu_tlbstate.active_mm, next);
|
|
||||||
#endif
|
|
||||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Re-load page tables.
|
|
||||||
*
|
|
||||||
* This logic has an ordering constraint:
|
|
||||||
*
|
|
||||||
* CPU 0: Write to a PTE for 'next'
|
|
||||||
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
|
||||||
* CPU 1: set bit 1 in next's mm_cpumask
|
|
||||||
* CPU 1: load from the PTE that CPU 0 writes (implicit)
|
|
||||||
*
|
|
||||||
* We need to prevent an outcome in which CPU 1 observes
|
|
||||||
* the new PTE value and CPU 0 observes bit 1 clear in
|
|
||||||
* mm_cpumask. (If that occurs, then the IPI will never
|
|
||||||
* be sent, and CPU 0's TLB will contain a stale entry.)
|
|
||||||
*
|
|
||||||
* The bad outcome can occur if either CPU's load is
|
|
||||||
* reordered before that CPU's store, so both CPUs must
|
|
||||||
* execute full barriers to prevent this from happening.
|
|
||||||
*
|
|
||||||
* Thus, switch_mm needs a full barrier between the
|
|
||||||
* store to mm_cpumask and any operation that could load
|
|
||||||
* from next->pgd. TLB fills are special and can happen
|
|
||||||
* due to instruction fetches or for no reason at all,
|
|
||||||
* and neither LOCK nor MFENCE orders them.
|
|
||||||
* Fortunately, load_cr3() is serializing and gives the
|
|
||||||
* ordering guarantee we need.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
load_cr3(next->pgd);
|
|
||||||
|
|
||||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
|
||||||
|
|
||||||
/* Stop flush ipis for the previous mm */
|
|
||||||
cpumask_clear_cpu(cpu, mm_cpumask(prev));
|
|
||||||
|
|
||||||
/* Load per-mm CR4 state */
|
|
||||||
load_mm_cr4(next);
|
|
||||||
|
|
||||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
|
||||||
/*
|
|
||||||
* Load the LDT, if the LDT is different.
|
|
||||||
*
|
|
||||||
* It's possible that prev->context.ldt doesn't match
|
|
||||||
* the LDT register. This can happen if leave_mm(prev)
|
|
||||||
* was called and then modify_ldt changed
|
|
||||||
* prev->context.ldt but suppressed an IPI to this CPU.
|
|
||||||
* In this case, prev->context.ldt != NULL, because we
|
|
||||||
* never set context.ldt to NULL while the mm still
|
|
||||||
* exists. That means that next->context.ldt !=
|
|
||||||
* prev->context.ldt, because mms never share an LDT.
|
|
||||||
*/
|
|
||||||
if (unlikely(prev->context.ldt != next->context.ldt))
|
|
||||||
load_mm_ldt(next);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
else {
|
|
||||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
|
||||||
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
|
|
||||||
|
|
||||||
if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
|
||||||
/*
|
|
||||||
* On established mms, the mm_cpumask is only changed
|
|
||||||
* from irq context, from ptep_clear_flush() while in
|
|
||||||
* lazy tlb mode, and here. Irqs are blocked during
|
|
||||||
* schedule, protecting us from simultaneous changes.
|
|
||||||
*/
|
|
||||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We were in lazy tlb mode and leave_mm disabled
|
|
||||||
* tlb flush IPI delivery. We must reload CR3
|
|
||||||
* to make sure to use no freed page tables.
|
|
||||||
*
|
|
||||||
* As above, load_cr3() is serializing and orders TLB
|
|
||||||
* fills with respect to the mm_cpumask write.
|
|
||||||
*/
|
|
||||||
load_cr3(next->pgd);
|
|
||||||
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
|
||||||
load_mm_cr4(next);
|
|
||||||
load_mm_ldt(next);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#define activate_mm(prev, next) \
|
#define activate_mm(prev, next) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
KCOV_INSTRUMENT_tlb.o := n
|
KCOV_INSTRUMENT_tlb.o := n
|
||||||
|
|
||||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||||
pat.o pgtable.o physaddr.o gup.o setup_nx.o
|
pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
|
||||||
|
|
||||||
# Make sure __phys_addr has no stackprotector
|
# Make sure __phys_addr has no stackprotector
|
||||||
nostackp := $(call cc-option, -fno-stack-protector)
|
nostackp := $(call cc-option, -fno-stack-protector)
|
||||||
@ -12,7 +12,6 @@ CFLAGS_setup_nx.o := $(nostackp)
|
|||||||
CFLAGS_fault.o := -I$(src)/../include/asm/trace
|
CFLAGS_fault.o := -I$(src)/../include/asm/trace
|
||||||
|
|
||||||
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
|
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
|
||||||
obj-$(CONFIG_SMP) += tlb.o
|
|
||||||
|
|
||||||
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
|
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
|
||||||
|
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
struct flush_tlb_info {
|
struct flush_tlb_info {
|
||||||
struct mm_struct *flush_mm;
|
struct mm_struct *flush_mm;
|
||||||
unsigned long flush_start;
|
unsigned long flush_start;
|
||||||
@ -57,6 +59,118 @@ void leave_mm(int cpu)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(leave_mm);
|
EXPORT_SYMBOL_GPL(leave_mm);
|
||||||
|
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
|
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
local_irq_save(flags);
|
||||||
|
switch_mm_irqs_off(prev, next, tsk);
|
||||||
|
local_irq_restore(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
unsigned cpu = smp_processor_id();
|
||||||
|
|
||||||
|
if (likely(prev != next)) {
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||||
|
this_cpu_write(cpu_tlbstate.active_mm, next);
|
||||||
|
#endif
|
||||||
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-load page tables.
|
||||||
|
*
|
||||||
|
* This logic has an ordering constraint:
|
||||||
|
*
|
||||||
|
* CPU 0: Write to a PTE for 'next'
|
||||||
|
* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
|
||||||
|
* CPU 1: set bit 1 in next's mm_cpumask
|
||||||
|
* CPU 1: load from the PTE that CPU 0 writes (implicit)
|
||||||
|
*
|
||||||
|
* We need to prevent an outcome in which CPU 1 observes
|
||||||
|
* the new PTE value and CPU 0 observes bit 1 clear in
|
||||||
|
* mm_cpumask. (If that occurs, then the IPI will never
|
||||||
|
* be sent, and CPU 0's TLB will contain a stale entry.)
|
||||||
|
*
|
||||||
|
* The bad outcome can occur if either CPU's load is
|
||||||
|
* reordered before that CPU's store, so both CPUs must
|
||||||
|
* execute full barriers to prevent this from happening.
|
||||||
|
*
|
||||||
|
* Thus, switch_mm needs a full barrier between the
|
||||||
|
* store to mm_cpumask and any operation that could load
|
||||||
|
* from next->pgd. TLB fills are special and can happen
|
||||||
|
* due to instruction fetches or for no reason at all,
|
||||||
|
* and neither LOCK nor MFENCE orders them.
|
||||||
|
* Fortunately, load_cr3() is serializing and gives the
|
||||||
|
* ordering guarantee we need.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
|
||||||
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||||
|
|
||||||
|
/* Stop flush ipis for the previous mm */
|
||||||
|
cpumask_clear_cpu(cpu, mm_cpumask(prev));
|
||||||
|
|
||||||
|
/* Load per-mm CR4 state */
|
||||||
|
load_mm_cr4(next);
|
||||||
|
|
||||||
|
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||||
|
/*
|
||||||
|
* Load the LDT, if the LDT is different.
|
||||||
|
*
|
||||||
|
* It's possible that prev->context.ldt doesn't match
|
||||||
|
* the LDT register. This can happen if leave_mm(prev)
|
||||||
|
* was called and then modify_ldt changed
|
||||||
|
* prev->context.ldt but suppressed an IPI to this CPU.
|
||||||
|
* In this case, prev->context.ldt != NULL, because we
|
||||||
|
* never set context.ldt to NULL while the mm still
|
||||||
|
* exists. That means that next->context.ldt !=
|
||||||
|
* prev->context.ldt, because mms never share an LDT.
|
||||||
|
*/
|
||||||
|
if (unlikely(prev->context.ldt != next->context.ldt))
|
||||||
|
load_mm_ldt(next);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
else {
|
||||||
|
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||||
|
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
|
||||||
|
|
||||||
|
if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
|
||||||
|
/*
|
||||||
|
* On established mms, the mm_cpumask is only changed
|
||||||
|
* from irq context, from ptep_clear_flush() while in
|
||||||
|
* lazy tlb mode, and here. Irqs are blocked during
|
||||||
|
* schedule, protecting us from simultaneous changes.
|
||||||
|
*/
|
||||||
|
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We were in lazy tlb mode and leave_mm disabled
|
||||||
|
* tlb flush IPI delivery. We must reload CR3
|
||||||
|
* to make sure to use no freed page tables.
|
||||||
|
*
|
||||||
|
* As above, load_cr3() is serializing and orders TLB
|
||||||
|
* fills with respect to the mm_cpumask write.
|
||||||
|
*/
|
||||||
|
load_cr3(next->pgd);
|
||||||
|
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||||
|
load_mm_cr4(next);
|
||||||
|
load_mm_ldt(next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The flush IPI assumes that a thread switch happens in this order:
|
* The flush IPI assumes that a thread switch happens in this order:
|
||||||
* [cpu0: the cpu that switches]
|
* [cpu0: the cpu that switches]
|
||||||
@ -353,3 +467,5 @@ static int __init create_tlb_single_page_flush_ceiling(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
late_initcall(create_tlb_single_page_flush_ceiling);
|
late_initcall(create_tlb_single_page_flush_ceiling);
|
||||||
|
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
|
@ -59,25 +59,7 @@ struct notifier_block;
|
|||||||
* CPU notifier priorities.
|
* CPU notifier priorities.
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
/*
|
|
||||||
* SCHED_ACTIVE marks a cpu which is coming up active during
|
|
||||||
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
|
|
||||||
* notifier. CPUSET_ACTIVE adjusts cpuset according to
|
|
||||||
* cpu_active mask right after SCHED_ACTIVE. During
|
|
||||||
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
|
|
||||||
* ordered in the similar way.
|
|
||||||
*
|
|
||||||
* This ordering guarantees consistent cpu_active mask and
|
|
||||||
* migration behavior to all cpu notifiers.
|
|
||||||
*/
|
|
||||||
CPU_PRI_SCHED_ACTIVE = INT_MAX,
|
|
||||||
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
|
|
||||||
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
|
|
||||||
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
|
|
||||||
|
|
||||||
/* migration should happen before other stuff but after perf */
|
|
||||||
CPU_PRI_PERF = 20,
|
CPU_PRI_PERF = 20,
|
||||||
CPU_PRI_MIGRATION = 10,
|
|
||||||
|
|
||||||
/* bring up workqueues before normal notifiers and down after */
|
/* bring up workqueues before normal notifiers and down after */
|
||||||
CPU_PRI_WORKQUEUE_UP = 5,
|
CPU_PRI_WORKQUEUE_UP = 5,
|
||||||
|
@ -8,6 +8,7 @@ enum cpuhp_state {
|
|||||||
CPUHP_BRINGUP_CPU,
|
CPUHP_BRINGUP_CPU,
|
||||||
CPUHP_AP_IDLE_DEAD,
|
CPUHP_AP_IDLE_DEAD,
|
||||||
CPUHP_AP_OFFLINE,
|
CPUHP_AP_OFFLINE,
|
||||||
|
CPUHP_AP_SCHED_STARTING,
|
||||||
CPUHP_AP_NOTIFY_STARTING,
|
CPUHP_AP_NOTIFY_STARTING,
|
||||||
CPUHP_AP_ONLINE,
|
CPUHP_AP_ONLINE,
|
||||||
CPUHP_TEARDOWN_CPU,
|
CPUHP_TEARDOWN_CPU,
|
||||||
@ -16,6 +17,7 @@ enum cpuhp_state {
|
|||||||
CPUHP_AP_NOTIFY_ONLINE,
|
CPUHP_AP_NOTIFY_ONLINE,
|
||||||
CPUHP_AP_ONLINE_DYN,
|
CPUHP_AP_ONLINE_DYN,
|
||||||
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
|
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
|
||||||
|
CPUHP_AP_ACTIVE,
|
||||||
CPUHP_ONLINE,
|
CPUHP_ONLINE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
|
|||||||
static inline void
|
static inline void
|
||||||
set_cpu_online(unsigned int cpu, bool online)
|
set_cpu_online(unsigned int cpu, bool online)
|
||||||
{
|
{
|
||||||
if (online) {
|
if (online)
|
||||||
cpumask_set_cpu(cpu, &__cpu_online_mask);
|
cpumask_set_cpu(cpu, &__cpu_online_mask);
|
||||||
cpumask_set_cpu(cpu, &__cpu_active_mask);
|
else
|
||||||
} else {
|
|
||||||
cpumask_clear_cpu(cpu, &__cpu_online_mask);
|
cpumask_clear_cpu(cpu, &__cpu_online_mask);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -356,8 +356,13 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
|
|||||||
extern void lockdep_clear_current_reclaim_state(void);
|
extern void lockdep_clear_current_reclaim_state(void);
|
||||||
extern void lockdep_trace_alloc(gfp_t mask);
|
extern void lockdep_trace_alloc(gfp_t mask);
|
||||||
|
|
||||||
extern void lock_pin_lock(struct lockdep_map *lock);
|
struct pin_cookie { unsigned int val; };
|
||||||
extern void lock_unpin_lock(struct lockdep_map *lock);
|
|
||||||
|
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
|
||||||
|
|
||||||
|
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
|
||||||
|
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
|
||||||
|
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
|
||||||
|
|
||||||
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
|
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
|
||||||
|
|
||||||
@ -373,8 +378,9 @@ extern void lock_unpin_lock(struct lockdep_map *lock);
|
|||||||
|
|
||||||
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
|
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
|
||||||
|
|
||||||
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
|
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
|
||||||
#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map)
|
#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c))
|
||||||
|
#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c))
|
||||||
|
|
||||||
#else /* !CONFIG_LOCKDEP */
|
#else /* !CONFIG_LOCKDEP */
|
||||||
|
|
||||||
@ -427,8 +433,13 @@ struct lock_class_key { };
|
|||||||
|
|
||||||
#define lockdep_recursing(tsk) (0)
|
#define lockdep_recursing(tsk) (0)
|
||||||
|
|
||||||
#define lockdep_pin_lock(l) do { (void)(l); } while (0)
|
struct pin_cookie { };
|
||||||
#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
|
|
||||||
|
#define NIL_COOKIE (struct pin_cookie){ }
|
||||||
|
|
||||||
|
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; })
|
||||||
|
#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
|
||||||
|
#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
|
||||||
|
|
||||||
#endif /* !LOCKDEP */
|
#endif /* !LOCKDEP */
|
||||||
|
|
||||||
|
@ -1,9 +1,16 @@
|
|||||||
#ifndef _LINUX_MMU_CONTEXT_H
|
#ifndef _LINUX_MMU_CONTEXT_H
|
||||||
#define _LINUX_MMU_CONTEXT_H
|
#define _LINUX_MMU_CONTEXT_H
|
||||||
|
|
||||||
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
|
|
||||||
void use_mm(struct mm_struct *mm);
|
void use_mm(struct mm_struct *mm);
|
||||||
void unuse_mm(struct mm_struct *mm);
|
void unuse_mm(struct mm_struct *mm);
|
||||||
|
|
||||||
|
/* Architectures that care about IRQ state in switch_mm can override this. */
|
||||||
|
#ifndef switch_mm_irqs_off
|
||||||
|
# define switch_mm_irqs_off switch_mm
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -177,9 +177,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
|
|||||||
extern void calc_global_load(unsigned long ticks);
|
extern void calc_global_load(unsigned long ticks);
|
||||||
|
|
||||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||||
extern void update_cpu_load_nohz(int active);
|
extern void cpu_load_update_nohz_start(void);
|
||||||
|
extern void cpu_load_update_nohz_stop(void);
|
||||||
#else
|
#else
|
||||||
static inline void update_cpu_load_nohz(int active) { }
|
static inline void cpu_load_update_nohz_start(void) { }
|
||||||
|
static inline void cpu_load_update_nohz_stop(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void dump_cpu_task(int cpu);
|
extern void dump_cpu_task(int cpu);
|
||||||
@ -371,6 +373,15 @@ extern void cpu_init (void);
|
|||||||
extern void trap_init(void);
|
extern void trap_init(void);
|
||||||
extern void update_process_times(int user);
|
extern void update_process_times(int user);
|
||||||
extern void scheduler_tick(void);
|
extern void scheduler_tick(void);
|
||||||
|
extern int sched_cpu_starting(unsigned int cpu);
|
||||||
|
extern int sched_cpu_activate(unsigned int cpu);
|
||||||
|
extern int sched_cpu_deactivate(unsigned int cpu);
|
||||||
|
|
||||||
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
|
extern int sched_cpu_dying(unsigned int cpu);
|
||||||
|
#else
|
||||||
|
# define sched_cpu_dying NULL
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void sched_show_task(struct task_struct *p);
|
extern void sched_show_task(struct task_struct *p);
|
||||||
|
|
||||||
@ -933,10 +944,20 @@ enum cpu_idle_type {
|
|||||||
CPU_MAX_IDLE_TYPES
|
CPU_MAX_IDLE_TYPES
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Integer metrics need fixed point arithmetic, e.g., sched/fair
|
||||||
|
* has a few: load, load_avg, util_avg, freq, and capacity.
|
||||||
|
*
|
||||||
|
* We define a basic fixed point arithmetic range, and then formalize
|
||||||
|
* all these metrics based on that basic range.
|
||||||
|
*/
|
||||||
|
# define SCHED_FIXEDPOINT_SHIFT 10
|
||||||
|
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Increase resolution of cpu_capacity calculations
|
* Increase resolution of cpu_capacity calculations
|
||||||
*/
|
*/
|
||||||
#define SCHED_CAPACITY_SHIFT 10
|
#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
|
||||||
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1198,18 +1219,56 @@ struct load_weight {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The load_avg/util_avg accumulates an infinite geometric series.
|
* The load_avg/util_avg accumulates an infinite geometric series
|
||||||
* 1) load_avg factors frequency scaling into the amount of time that a
|
* (see __update_load_avg() in kernel/sched/fair.c).
|
||||||
* sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
|
*
|
||||||
* aggregated such weights of all runnable and blocked sched_entities.
|
* [load_avg definition]
|
||||||
* 2) util_avg factors frequency and cpu scaling into the amount of time
|
*
|
||||||
* that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
|
* load_avg = runnable% * scale_load_down(load)
|
||||||
* For cfs_rq, it is the aggregated such times of all runnable and
|
*
|
||||||
|
* where runnable% is the time ratio that a sched_entity is runnable.
|
||||||
|
* For cfs_rq, it is the aggregated load_avg of all runnable and
|
||||||
* blocked sched_entities.
|
* blocked sched_entities.
|
||||||
* The 64 bit load_sum can:
|
*
|
||||||
* 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
|
* load_avg may also take frequency scaling into account:
|
||||||
* the highest weight (=88761) always runnable, we should not overflow
|
*
|
||||||
* 2) for entity, support any load.weight always runnable
|
* load_avg = runnable% * scale_load_down(load) * freq%
|
||||||
|
*
|
||||||
|
* where freq% is the CPU frequency normalized to the highest frequency.
|
||||||
|
*
|
||||||
|
* [util_avg definition]
|
||||||
|
*
|
||||||
|
* util_avg = running% * SCHED_CAPACITY_SCALE
|
||||||
|
*
|
||||||
|
* where running% is the time ratio that a sched_entity is running on
|
||||||
|
* a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
|
||||||
|
* and blocked sched_entities.
|
||||||
|
*
|
||||||
|
* util_avg may also factor frequency scaling and CPU capacity scaling:
|
||||||
|
*
|
||||||
|
* util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
|
||||||
|
*
|
||||||
|
* where freq% is the same as above, and capacity% is the CPU capacity
|
||||||
|
* normalized to the greatest capacity (due to uarch differences, etc).
|
||||||
|
*
|
||||||
|
* N.B., the above ratios (runnable%, running%, freq%, and capacity%)
|
||||||
|
* themselves are in the range of [0, 1]. To do fixed point arithmetics,
|
||||||
|
* we therefore scale them to as large a range as necessary. This is for
|
||||||
|
* example reflected by util_avg's SCHED_CAPACITY_SCALE.
|
||||||
|
*
|
||||||
|
* [Overflow issue]
|
||||||
|
*
|
||||||
|
* The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
|
||||||
|
* with the highest load (=88761), always runnable on a single cfs_rq,
|
||||||
|
* and should not overflow as the number already hits PID_MAX_LIMIT.
|
||||||
|
*
|
||||||
|
* For all other cases (including 32-bit kernels), struct load_weight's
|
||||||
|
* weight will overflow first before we do, because:
|
||||||
|
*
|
||||||
|
* Max(load_avg) <= Max(load.weight)
|
||||||
|
*
|
||||||
|
* Then it is the load_weight's responsibility to consider overflow
|
||||||
|
* issues.
|
||||||
*/
|
*/
|
||||||
struct sched_avg {
|
struct sched_avg {
|
||||||
u64 last_update_time, load_sum;
|
u64 last_update_time, load_sum;
|
||||||
@ -1871,6 +1930,11 @@ extern int arch_task_struct_size __read_mostly;
|
|||||||
/* Future-safe accessor for struct task_struct's cpus_allowed. */
|
/* Future-safe accessor for struct task_struct's cpus_allowed. */
|
||||||
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
|
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
|
||||||
|
|
||||||
|
static inline int tsk_nr_cpus_allowed(struct task_struct *p)
|
||||||
|
{
|
||||||
|
return p->nr_cpus_allowed;
|
||||||
|
}
|
||||||
|
|
||||||
#define TNF_MIGRATED 0x01
|
#define TNF_MIGRATED 0x01
|
||||||
#define TNF_NO_GROUP 0x02
|
#define TNF_NO_GROUP 0x02
|
||||||
#define TNF_SHARED 0x04
|
#define TNF_SHARED 0x04
|
||||||
@ -2303,8 +2367,6 @@ extern unsigned long long notrace sched_clock(void);
|
|||||||
/*
|
/*
|
||||||
* See the comment in kernel/sched/clock.c
|
* See the comment in kernel/sched/clock.c
|
||||||
*/
|
*/
|
||||||
extern u64 cpu_clock(int cpu);
|
|
||||||
extern u64 local_clock(void);
|
|
||||||
extern u64 running_clock(void);
|
extern u64 running_clock(void);
|
||||||
extern u64 sched_clock_cpu(int cpu);
|
extern u64 sched_clock_cpu(int cpu);
|
||||||
|
|
||||||
@ -2323,6 +2385,16 @@ static inline void sched_clock_idle_sleep_event(void)
|
|||||||
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
|
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 cpu_clock(int cpu)
|
||||||
|
{
|
||||||
|
return sched_clock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 local_clock(void)
|
||||||
|
{
|
||||||
|
return sched_clock();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* Architectures can set this to 1 if they have specified
|
* Architectures can set this to 1 if they have specified
|
||||||
@ -2337,6 +2409,26 @@ extern void clear_sched_clock_stable(void);
|
|||||||
extern void sched_clock_tick(void);
|
extern void sched_clock_tick(void);
|
||||||
extern void sched_clock_idle_sleep_event(void);
|
extern void sched_clock_idle_sleep_event(void);
|
||||||
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As outlined in clock.c, provides a fast, high resolution, nanosecond
|
||||||
|
* time source that is monotonic per cpu argument and has bounded drift
|
||||||
|
* between cpus.
|
||||||
|
*
|
||||||
|
* ######################### BIG FAT WARNING ##########################
|
||||||
|
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
|
||||||
|
* # go backwards !! #
|
||||||
|
* ####################################################################
|
||||||
|
*/
|
||||||
|
static inline u64 cpu_clock(int cpu)
|
||||||
|
{
|
||||||
|
return sched_clock_cpu(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 local_clock(void)
|
||||||
|
{
|
||||||
|
return sched_clock_cpu(raw_smp_processor_id());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||||
|
32
kernel/cpu.c
32
kernel/cpu.c
@ -703,21 +703,6 @@ static int takedown_cpu(unsigned int cpu)
|
|||||||
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
/*
|
|
||||||
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
|
|
||||||
* and RCU users of this state to go away such that all new such users
|
|
||||||
* will observe it.
|
|
||||||
*
|
|
||||||
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
|
|
||||||
* not imply sync_sched(), so wait for both.
|
|
||||||
*
|
|
||||||
* Do sync before park smpboot threads to take care the rcu boost case.
|
|
||||||
*/
|
|
||||||
if (IS_ENABLED(CONFIG_PREEMPT))
|
|
||||||
synchronize_rcu_mult(call_rcu, call_rcu_sched);
|
|
||||||
else
|
|
||||||
synchronize_rcu();
|
|
||||||
|
|
||||||
/* Park the smpboot threads */
|
/* Park the smpboot threads */
|
||||||
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
|
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
|
||||||
smpboot_park_threads(cpu);
|
smpboot_park_threads(cpu);
|
||||||
@ -923,8 +908,6 @@ void cpuhp_online_idle(enum cpuhp_state state)
|
|||||||
|
|
||||||
st->state = CPUHP_AP_ONLINE_IDLE;
|
st->state = CPUHP_AP_ONLINE_IDLE;
|
||||||
|
|
||||||
/* The cpu is marked online, set it active now */
|
|
||||||
set_cpu_active(cpu, true);
|
|
||||||
/* Unpark the stopper thread and the hotplug thread of this cpu */
|
/* Unpark the stopper thread and the hotplug thread of this cpu */
|
||||||
stop_machine_unpark(cpu);
|
stop_machine_unpark(cpu);
|
||||||
kthread_unpark(st->thread);
|
kthread_unpark(st->thread);
|
||||||
@ -1236,6 +1219,12 @@ static struct cpuhp_step cpuhp_ap_states[] = {
|
|||||||
.name = "ap:offline",
|
.name = "ap:offline",
|
||||||
.cant_stop = true,
|
.cant_stop = true,
|
||||||
},
|
},
|
||||||
|
/* First state is scheduler control. Interrupts are disabled */
|
||||||
|
[CPUHP_AP_SCHED_STARTING] = {
|
||||||
|
.name = "sched:starting",
|
||||||
|
.startup = sched_cpu_starting,
|
||||||
|
.teardown = sched_cpu_dying,
|
||||||
|
},
|
||||||
/*
|
/*
|
||||||
* Low level startup/teardown notifiers. Run with interrupts
|
* Low level startup/teardown notifiers. Run with interrupts
|
||||||
* disabled. Will be removed once the notifiers are converted to
|
* disabled. Will be removed once the notifiers are converted to
|
||||||
@ -1274,6 +1263,15 @@ static struct cpuhp_step cpuhp_ap_states[] = {
|
|||||||
* The dynamically registered state space is here
|
* The dynamically registered state space is here
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
/* Last state is scheduler control setting the cpu active */
|
||||||
|
[CPUHP_AP_ACTIVE] = {
|
||||||
|
.name = "sched:active",
|
||||||
|
.startup = sched_cpu_activate,
|
||||||
|
.teardown = sched_cpu_deactivate,
|
||||||
|
},
|
||||||
|
#endif
|
||||||
|
|
||||||
/* CPU is fully up and running. */
|
/* CPU is fully up and running. */
|
||||||
[CPUHP_ONLINE] = {
|
[CPUHP_ONLINE] = {
|
||||||
.name = "online",
|
.name = "online",
|
||||||
|
@ -45,6 +45,7 @@
|
|||||||
#include <linux/bitops.h>
|
#include <linux/bitops.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/kmemcheck.h>
|
#include <linux/kmemcheck.h>
|
||||||
|
#include <linux/random.h>
|
||||||
|
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
|
|
||||||
@ -3585,7 +3586,35 @@ static int __lock_is_held(struct lockdep_map *lock)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __lock_pin_lock(struct lockdep_map *lock)
|
static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
|
||||||
|
{
|
||||||
|
struct pin_cookie cookie = NIL_COOKIE;
|
||||||
|
struct task_struct *curr = current;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (unlikely(!debug_locks))
|
||||||
|
return cookie;
|
||||||
|
|
||||||
|
for (i = 0; i < curr->lockdep_depth; i++) {
|
||||||
|
struct held_lock *hlock = curr->held_locks + i;
|
||||||
|
|
||||||
|
if (match_held_lock(hlock, lock)) {
|
||||||
|
/*
|
||||||
|
* Grab 16bits of randomness; this is sufficient to not
|
||||||
|
* be guessable and still allows some pin nesting in
|
||||||
|
* our u32 pin_count.
|
||||||
|
*/
|
||||||
|
cookie.val = 1 + (prandom_u32() >> 16);
|
||||||
|
hlock->pin_count += cookie.val;
|
||||||
|
return cookie;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN(1, "pinning an unheld lock\n");
|
||||||
|
return cookie;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = current;
|
struct task_struct *curr = current;
|
||||||
int i;
|
int i;
|
||||||
@ -3597,7 +3626,7 @@ static void __lock_pin_lock(struct lockdep_map *lock)
|
|||||||
struct held_lock *hlock = curr->held_locks + i;
|
struct held_lock *hlock = curr->held_locks + i;
|
||||||
|
|
||||||
if (match_held_lock(hlock, lock)) {
|
if (match_held_lock(hlock, lock)) {
|
||||||
hlock->pin_count++;
|
hlock->pin_count += cookie.val;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3605,7 +3634,7 @@ static void __lock_pin_lock(struct lockdep_map *lock)
|
|||||||
WARN(1, "pinning an unheld lock\n");
|
WARN(1, "pinning an unheld lock\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __lock_unpin_lock(struct lockdep_map *lock)
|
static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = current;
|
struct task_struct *curr = current;
|
||||||
int i;
|
int i;
|
||||||
@ -3620,7 +3649,11 @@ static void __lock_unpin_lock(struct lockdep_map *lock)
|
|||||||
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
|
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hlock->pin_count--;
|
hlock->pin_count -= cookie.val;
|
||||||
|
|
||||||
|
if (WARN((int)hlock->pin_count < 0, "pin count corrupted\n"))
|
||||||
|
hlock->pin_count = 0;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3751,24 +3784,27 @@ int lock_is_held(struct lockdep_map *lock)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(lock_is_held);
|
EXPORT_SYMBOL_GPL(lock_is_held);
|
||||||
|
|
||||||
void lock_pin_lock(struct lockdep_map *lock)
|
struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
|
||||||
{
|
{
|
||||||
|
struct pin_cookie cookie = NIL_COOKIE;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
if (unlikely(current->lockdep_recursion))
|
if (unlikely(current->lockdep_recursion))
|
||||||
return;
|
return cookie;
|
||||||
|
|
||||||
raw_local_irq_save(flags);
|
raw_local_irq_save(flags);
|
||||||
check_flags(flags);
|
check_flags(flags);
|
||||||
|
|
||||||
current->lockdep_recursion = 1;
|
current->lockdep_recursion = 1;
|
||||||
__lock_pin_lock(lock);
|
cookie = __lock_pin_lock(lock);
|
||||||
current->lockdep_recursion = 0;
|
current->lockdep_recursion = 0;
|
||||||
raw_local_irq_restore(flags);
|
raw_local_irq_restore(flags);
|
||||||
|
|
||||||
|
return cookie;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(lock_pin_lock);
|
EXPORT_SYMBOL_GPL(lock_pin_lock);
|
||||||
|
|
||||||
void lock_unpin_lock(struct lockdep_map *lock)
|
void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
@ -3779,7 +3815,24 @@ void lock_unpin_lock(struct lockdep_map *lock)
|
|||||||
check_flags(flags);
|
check_flags(flags);
|
||||||
|
|
||||||
current->lockdep_recursion = 1;
|
current->lockdep_recursion = 1;
|
||||||
__lock_unpin_lock(lock);
|
__lock_repin_lock(lock, cookie);
|
||||||
|
current->lockdep_recursion = 0;
|
||||||
|
raw_local_irq_restore(flags);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(lock_repin_lock);
|
||||||
|
|
||||||
|
void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (unlikely(current->lockdep_recursion))
|
||||||
|
return;
|
||||||
|
|
||||||
|
raw_local_irq_save(flags);
|
||||||
|
check_flags(flags);
|
||||||
|
|
||||||
|
current->lockdep_recursion = 1;
|
||||||
|
__lock_unpin_lock(lock, cookie);
|
||||||
current->lockdep_recursion = 0;
|
current->lockdep_recursion = 0;
|
||||||
raw_local_irq_restore(flags);
|
raw_local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
@ -318,6 +318,7 @@ u64 sched_clock_cpu(int cpu)
|
|||||||
|
|
||||||
return clock;
|
return clock;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(sched_clock_cpu);
|
||||||
|
|
||||||
void sched_clock_tick(void)
|
void sched_clock_tick(void)
|
||||||
{
|
{
|
||||||
@ -363,39 +364,6 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
|
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
|
||||||
|
|
||||||
/*
|
|
||||||
* As outlined at the top, provides a fast, high resolution, nanosecond
|
|
||||||
* time source that is monotonic per cpu argument and has bounded drift
|
|
||||||
* between cpus.
|
|
||||||
*
|
|
||||||
* ######################### BIG FAT WARNING ##########################
|
|
||||||
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
|
|
||||||
* # go backwards !! #
|
|
||||||
* ####################################################################
|
|
||||||
*/
|
|
||||||
u64 cpu_clock(int cpu)
|
|
||||||
{
|
|
||||||
if (!sched_clock_stable())
|
|
||||||
return sched_clock_cpu(cpu);
|
|
||||||
|
|
||||||
return sched_clock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Similar to cpu_clock() for the current cpu. Time will only be observed
|
|
||||||
* to be monotonic if care is taken to only compare timestampt taken on the
|
|
||||||
* same CPU.
|
|
||||||
*
|
|
||||||
* See cpu_clock().
|
|
||||||
*/
|
|
||||||
u64 local_clock(void)
|
|
||||||
{
|
|
||||||
if (!sched_clock_stable())
|
|
||||||
return sched_clock_cpu(raw_smp_processor_id());
|
|
||||||
|
|
||||||
return sched_clock();
|
|
||||||
}
|
|
||||||
|
|
||||||
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||||
|
|
||||||
void sched_clock_init(void)
|
void sched_clock_init(void)
|
||||||
@ -410,22 +378,8 @@ u64 sched_clock_cpu(int cpu)
|
|||||||
|
|
||||||
return sched_clock();
|
return sched_clock();
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 cpu_clock(int cpu)
|
|
||||||
{
|
|
||||||
return sched_clock();
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 local_clock(void)
|
|
||||||
{
|
|
||||||
return sched_clock();
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(cpu_clock);
|
|
||||||
EXPORT_SYMBOL_GPL(local_clock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Running clock - returns the time that has elapsed while a guest has been
|
* Running clock - returns the time that has elapsed while a guest has been
|
||||||
* running.
|
* running.
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -25,11 +25,22 @@ enum cpuacct_stat_index {
|
|||||||
CPUACCT_STAT_NSTATS,
|
CPUACCT_STAT_NSTATS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum cpuacct_usage_index {
|
||||||
|
CPUACCT_USAGE_USER, /* ... user mode */
|
||||||
|
CPUACCT_USAGE_SYSTEM, /* ... kernel mode */
|
||||||
|
|
||||||
|
CPUACCT_USAGE_NRUSAGE,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cpuacct_usage {
|
||||||
|
u64 usages[CPUACCT_USAGE_NRUSAGE];
|
||||||
|
};
|
||||||
|
|
||||||
/* track cpu usage of a group of tasks and its child groups */
|
/* track cpu usage of a group of tasks and its child groups */
|
||||||
struct cpuacct {
|
struct cpuacct {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||||
u64 __percpu *cpuusage;
|
struct cpuacct_usage __percpu *cpuusage;
|
||||||
struct kernel_cpustat __percpu *cpustat;
|
struct kernel_cpustat __percpu *cpustat;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -49,7 +60,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
|||||||
return css_ca(ca->css.parent);
|
return css_ca(ca->css.parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
|
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
|
||||||
static struct cpuacct root_cpuacct = {
|
static struct cpuacct root_cpuacct = {
|
||||||
.cpustat = &kernel_cpustat,
|
.cpustat = &kernel_cpustat,
|
||||||
.cpuusage = &root_cpuacct_cpuusage,
|
.cpuusage = &root_cpuacct_cpuusage,
|
||||||
@ -68,7 +79,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||||||
if (!ca)
|
if (!ca)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ca->cpuusage = alloc_percpu(u64);
|
ca->cpuusage = alloc_percpu(struct cpuacct_usage);
|
||||||
if (!ca->cpuusage)
|
if (!ca->cpuusage)
|
||||||
goto out_free_ca;
|
goto out_free_ca;
|
||||||
|
|
||||||
@ -96,20 +107,37 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
|
|||||||
kfree(ca);
|
kfree(ca);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
|
||||||
|
enum cpuacct_usage_index index)
|
||||||
{
|
{
|
||||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||||
u64 data;
|
u64 data;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We allow index == CPUACCT_USAGE_NRUSAGE here to read
|
||||||
|
* the sum of suages.
|
||||||
|
*/
|
||||||
|
BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
|
||||||
|
|
||||||
#ifndef CONFIG_64BIT
|
#ifndef CONFIG_64BIT
|
||||||
/*
|
/*
|
||||||
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
||||||
*/
|
*/
|
||||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||||
data = *cpuusage;
|
#endif
|
||||||
|
|
||||||
|
if (index == CPUACCT_USAGE_NRUSAGE) {
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
data = 0;
|
||||||
|
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
|
||||||
|
data += cpuusage->usages[i];
|
||||||
|
} else {
|
||||||
|
data = cpuusage->usages[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||||
#else
|
|
||||||
data = *cpuusage;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
@ -117,69 +145,103 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
|||||||
|
|
||||||
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
||||||
{
|
{
|
||||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||||
|
int i;
|
||||||
|
|
||||||
#ifndef CONFIG_64BIT
|
#ifndef CONFIG_64BIT
|
||||||
/*
|
/*
|
||||||
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
||||||
*/
|
*/
|
||||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||||
*cpuusage = val;
|
#endif
|
||||||
|
|
||||||
|
for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
|
||||||
|
cpuusage->usages[i] = val;
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||||
#else
|
|
||||||
*cpuusage = val;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return total cpu usage (in nanoseconds) of a group */
|
/* return total cpu usage (in nanoseconds) of a group */
|
||||||
static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
|
||||||
|
enum cpuacct_usage_index index)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca = css_ca(css);
|
struct cpuacct *ca = css_ca(css);
|
||||||
u64 totalcpuusage = 0;
|
u64 totalcpuusage = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for_each_present_cpu(i)
|
for_each_possible_cpu(i)
|
||||||
totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
|
||||||
|
|
||||||
return totalcpuusage;
|
return totalcpuusage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
|
||||||
|
struct cftype *cft)
|
||||||
|
{
|
||||||
|
return __cpuusage_read(css, CPUACCT_USAGE_USER);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
|
||||||
|
struct cftype *cft)
|
||||||
|
{
|
||||||
|
return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
||||||
|
{
|
||||||
|
return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||||
u64 val)
|
u64 val)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca = css_ca(css);
|
struct cpuacct *ca = css_ca(css);
|
||||||
int err = 0;
|
int cpu;
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only allow '0' here to do a reset.
|
* Only allow '0' here to do a reset.
|
||||||
*/
|
*/
|
||||||
if (val) {
|
if (val)
|
||||||
err = -EINVAL;
|
return -EINVAL;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for_each_present_cpu(i)
|
for_each_possible_cpu(cpu)
|
||||||
cpuacct_cpuusage_write(ca, i, 0);
|
cpuacct_cpuusage_write(ca, cpu, 0);
|
||||||
|
|
||||||
out:
|
return 0;
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
|
static int __cpuacct_percpu_seq_show(struct seq_file *m,
|
||||||
|
enum cpuacct_usage_index index)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca = css_ca(seq_css(m));
|
struct cpuacct *ca = css_ca(seq_css(m));
|
||||||
u64 percpu;
|
u64 percpu;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for_each_present_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
percpu = cpuacct_cpuusage_read(ca, i);
|
percpu = cpuacct_cpuusage_read(ca, i, index);
|
||||||
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
||||||
}
|
}
|
||||||
seq_printf(m, "\n");
|
seq_printf(m, "\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
|
||||||
|
{
|
||||||
|
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
|
||||||
|
{
|
||||||
|
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
|
||||||
|
{
|
||||||
|
return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
|
||||||
|
}
|
||||||
|
|
||||||
static const char * const cpuacct_stat_desc[] = {
|
static const char * const cpuacct_stat_desc[] = {
|
||||||
[CPUACCT_STAT_USER] = "user",
|
[CPUACCT_STAT_USER] = "user",
|
||||||
[CPUACCT_STAT_SYSTEM] = "system",
|
[CPUACCT_STAT_SYSTEM] = "system",
|
||||||
@ -191,7 +253,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
|
|||||||
int cpu;
|
int cpu;
|
||||||
s64 val = 0;
|
s64 val = 0;
|
||||||
|
|
||||||
for_each_online_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||||
val += kcpustat->cpustat[CPUTIME_USER];
|
val += kcpustat->cpustat[CPUTIME_USER];
|
||||||
val += kcpustat->cpustat[CPUTIME_NICE];
|
val += kcpustat->cpustat[CPUTIME_NICE];
|
||||||
@ -200,7 +262,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
|
|||||||
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
||||||
|
|
||||||
val = 0;
|
val = 0;
|
||||||
for_each_online_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||||
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
||||||
val += kcpustat->cpustat[CPUTIME_IRQ];
|
val += kcpustat->cpustat[CPUTIME_IRQ];
|
||||||
@ -219,10 +281,26 @@ static struct cftype files[] = {
|
|||||||
.read_u64 = cpuusage_read,
|
.read_u64 = cpuusage_read,
|
||||||
.write_u64 = cpuusage_write,
|
.write_u64 = cpuusage_write,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "usage_user",
|
||||||
|
.read_u64 = cpuusage_user_read,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "usage_sys",
|
||||||
|
.read_u64 = cpuusage_sys_read,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.name = "usage_percpu",
|
.name = "usage_percpu",
|
||||||
.seq_show = cpuacct_percpu_seq_show,
|
.seq_show = cpuacct_percpu_seq_show,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "usage_percpu_user",
|
||||||
|
.seq_show = cpuacct_percpu_user_seq_show,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "usage_percpu_sys",
|
||||||
|
.seq_show = cpuacct_percpu_sys_seq_show,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.name = "stat",
|
.name = "stat",
|
||||||
.seq_show = cpuacct_stats_show,
|
.seq_show = cpuacct_stats_show,
|
||||||
@ -238,10 +316,17 @@ static struct cftype files[] = {
|
|||||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca;
|
struct cpuacct *ca;
|
||||||
|
int index = CPUACCT_USAGE_SYSTEM;
|
||||||
|
struct pt_regs *regs = task_pt_regs(tsk);
|
||||||
|
|
||||||
|
if (regs && user_mode(regs))
|
||||||
|
index = CPUACCT_USAGE_USER;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
||||||
*this_cpu_ptr(ca->cpuusage) += cputime;
|
this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
|||||||
const struct sched_dl_entity *dl_se = &p->dl;
|
const struct sched_dl_entity *dl_se = &p->dl;
|
||||||
|
|
||||||
if (later_mask &&
|
if (later_mask &&
|
||||||
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
|
cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
|
||||||
best_cpu = cpumask_any(later_mask);
|
best_cpu = cpumask_any(later_mask);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
|
} else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
|
||||||
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
||||||
best_cpu = cpudl_maximum(cp);
|
best_cpu = cpudl_maximum(cp);
|
||||||
if (later_mask)
|
if (later_mask)
|
||||||
|
@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
|||||||
if (skip)
|
if (skip)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (lowest_mask) {
|
if (lowest_mask) {
|
||||||
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to ensure that we have at least one bit
|
* We have to ensure that we have at least one bit
|
||||||
|
@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
|||||||
{
|
{
|
||||||
struct task_struct *p = dl_task_of(dl_se);
|
struct task_struct *p = dl_task_of(dl_se);
|
||||||
|
|
||||||
if (p->nr_cpus_allowed > 1)
|
if (tsk_nr_cpus_allowed(p) > 1)
|
||||||
dl_rq->dl_nr_migratory++;
|
dl_rq->dl_nr_migratory++;
|
||||||
|
|
||||||
update_dl_migration(dl_rq);
|
update_dl_migration(dl_rq);
|
||||||
@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
|||||||
{
|
{
|
||||||
struct task_struct *p = dl_task_of(dl_se);
|
struct task_struct *p = dl_task_of(dl_se);
|
||||||
|
|
||||||
if (p->nr_cpus_allowed > 1)
|
if (tsk_nr_cpus_allowed(p) > 1)
|
||||||
dl_rq->dl_nr_migratory--;
|
dl_rq->dl_nr_migratory--;
|
||||||
|
|
||||||
update_dl_migration(dl_rq);
|
update_dl_migration(dl_rq);
|
||||||
@ -591,10 +591,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
|||||||
struct sched_dl_entity,
|
struct sched_dl_entity,
|
||||||
dl_timer);
|
dl_timer);
|
||||||
struct task_struct *p = dl_task_of(dl_se);
|
struct task_struct *p = dl_task_of(dl_se);
|
||||||
unsigned long flags;
|
struct rq_flags rf;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
|
|
||||||
rq = task_rq_lock(p, &flags);
|
rq = task_rq_lock(p, &rf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The task might have changed its scheduling policy to something
|
* The task might have changed its scheduling policy to something
|
||||||
@ -670,14 +670,14 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
|||||||
* Nothing relies on rq->lock after this, so its safe to drop
|
* Nothing relies on rq->lock after this, so its safe to drop
|
||||||
* rq->lock.
|
* rq->lock.
|
||||||
*/
|
*/
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, rf.cookie);
|
||||||
push_dl_task(rq);
|
push_dl_task(rq);
|
||||||
lockdep_pin_lock(&rq->lock);
|
lockdep_repin_lock(&rq->lock, rf.cookie);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
task_rq_unlock(rq, p, &flags);
|
task_rq_unlock(rq, p, &rf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This can free the task_struct, including this hrtimer, do not touch
|
* This can free the task_struct, including this hrtimer, do not touch
|
||||||
@ -717,10 +717,6 @@ static void update_curr_dl(struct rq *rq)
|
|||||||
if (!dl_task(curr) || !on_dl_rq(dl_se))
|
if (!dl_task(curr) || !on_dl_rq(dl_se))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
|
|
||||||
if (cpu_of(rq) == smp_processor_id())
|
|
||||||
cpufreq_trigger_update(rq_clock(rq));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Consumed budget is computed considering the time as
|
* Consumed budget is computed considering the time as
|
||||||
* observed by schedulable tasks (excluding time spent
|
* observed by schedulable tasks (excluding time spent
|
||||||
@ -736,6 +732,10 @@ static void update_curr_dl(struct rq *rq)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* kick cpufreq (see the comment in linux/cpufreq.h). */
|
||||||
|
if (cpu_of(rq) == smp_processor_id())
|
||||||
|
cpufreq_trigger_update(rq_clock(rq));
|
||||||
|
|
||||||
schedstat_set(curr->se.statistics.exec_max,
|
schedstat_set(curr->se.statistics.exec_max,
|
||||||
max(curr->se.statistics.exec_max, delta_exec));
|
max(curr->se.statistics.exec_max, delta_exec));
|
||||||
|
|
||||||
@ -966,7 +966,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
|
|
||||||
enqueue_dl_entity(&p->dl, pi_se, flags);
|
enqueue_dl_entity(&p->dl, pi_se, flags);
|
||||||
|
|
||||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
|
||||||
enqueue_pushable_dl_task(rq, p);
|
enqueue_pushable_dl_task(rq, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1040,9 +1040,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
|
|||||||
* try to make it stay here, it might be important.
|
* try to make it stay here, it might be important.
|
||||||
*/
|
*/
|
||||||
if (unlikely(dl_task(curr)) &&
|
if (unlikely(dl_task(curr)) &&
|
||||||
(curr->nr_cpus_allowed < 2 ||
|
(tsk_nr_cpus_allowed(curr) < 2 ||
|
||||||
!dl_entity_preempt(&p->dl, &curr->dl)) &&
|
!dl_entity_preempt(&p->dl, &curr->dl)) &&
|
||||||
(p->nr_cpus_allowed > 1)) {
|
(tsk_nr_cpus_allowed(p) > 1)) {
|
||||||
int target = find_later_rq(p);
|
int target = find_later_rq(p);
|
||||||
|
|
||||||
if (target != -1 &&
|
if (target != -1 &&
|
||||||
@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
|
|||||||
* Current can't be migrated, useless to reschedule,
|
* Current can't be migrated, useless to reschedule,
|
||||||
* let's hope p can move out.
|
* let's hope p can move out.
|
||||||
*/
|
*/
|
||||||
if (rq->curr->nr_cpus_allowed == 1 ||
|
if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
|
||||||
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
|
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1071,7 +1071,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
|
|||||||
* p is migratable, so let's not schedule it and
|
* p is migratable, so let's not schedule it and
|
||||||
* see if it is pushed or pulled somewhere else.
|
* see if it is pushed or pulled somewhere else.
|
||||||
*/
|
*/
|
||||||
if (p->nr_cpus_allowed != 1 &&
|
if (tsk_nr_cpus_allowed(p) != 1 &&
|
||||||
cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
|
cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1125,7 +1125,8 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
|
|||||||
return rb_entry(left, struct sched_dl_entity, rb_node);
|
return rb_entry(left, struct sched_dl_entity, rb_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
|
struct task_struct *
|
||||||
|
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct sched_dl_entity *dl_se;
|
struct sched_dl_entity *dl_se;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
@ -1140,9 +1141,9 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
|
|||||||
* disabled avoiding further scheduler activity on it and we're
|
* disabled avoiding further scheduler activity on it and we're
|
||||||
* being very careful to re-start the picking loop.
|
* being very careful to re-start the picking loop.
|
||||||
*/
|
*/
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, cookie);
|
||||||
pull_dl_task(rq);
|
pull_dl_task(rq);
|
||||||
lockdep_pin_lock(&rq->lock);
|
lockdep_repin_lock(&rq->lock, cookie);
|
||||||
/*
|
/*
|
||||||
* pull_rt_task() can drop (and re-acquire) rq->lock; this
|
* pull_rt_task() can drop (and re-acquire) rq->lock; this
|
||||||
* means a stop task can slip in, in which case we need to
|
* means a stop task can slip in, in which case we need to
|
||||||
@ -1185,7 +1186,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
|
|||||||
{
|
{
|
||||||
update_curr_dl(rq);
|
update_curr_dl(rq);
|
||||||
|
|
||||||
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
|
if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
|
||||||
enqueue_pushable_dl_task(rq, p);
|
enqueue_pushable_dl_task(rq, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1286,7 +1287,7 @@ static int find_later_rq(struct task_struct *task)
|
|||||||
if (unlikely(!later_mask))
|
if (unlikely(!later_mask))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (task->nr_cpus_allowed == 1)
|
if (tsk_nr_cpus_allowed(task) == 1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1392,7 +1393,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
|||||||
if (double_lock_balance(rq, later_rq)) {
|
if (double_lock_balance(rq, later_rq)) {
|
||||||
if (unlikely(task_rq(task) != rq ||
|
if (unlikely(task_rq(task) != rq ||
|
||||||
!cpumask_test_cpu(later_rq->cpu,
|
!cpumask_test_cpu(later_rq->cpu,
|
||||||
&task->cpus_allowed) ||
|
tsk_cpus_allowed(task)) ||
|
||||||
task_running(rq, task) ||
|
task_running(rq, task) ||
|
||||||
!dl_task(task) ||
|
!dl_task(task) ||
|
||||||
!task_on_rq_queued(task))) {
|
!task_on_rq_queued(task))) {
|
||||||
@ -1432,7 +1433,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
|||||||
|
|
||||||
BUG_ON(rq->cpu != task_cpu(p));
|
BUG_ON(rq->cpu != task_cpu(p));
|
||||||
BUG_ON(task_current(rq, p));
|
BUG_ON(task_current(rq, p));
|
||||||
BUG_ON(p->nr_cpus_allowed <= 1);
|
BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
|
||||||
|
|
||||||
BUG_ON(!task_on_rq_queued(p));
|
BUG_ON(!task_on_rq_queued(p));
|
||||||
BUG_ON(!dl_task(p));
|
BUG_ON(!dl_task(p));
|
||||||
@ -1471,7 +1472,7 @@ retry:
|
|||||||
*/
|
*/
|
||||||
if (dl_task(rq->curr) &&
|
if (dl_task(rq->curr) &&
|
||||||
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
|
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
|
||||||
rq->curr->nr_cpus_allowed > 1) {
|
tsk_nr_cpus_allowed(rq->curr) > 1) {
|
||||||
resched_curr(rq);
|
resched_curr(rq);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1618,9 +1619,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
|
|||||||
{
|
{
|
||||||
if (!task_running(rq, p) &&
|
if (!task_running(rq, p) &&
|
||||||
!test_tsk_need_resched(rq->curr) &&
|
!test_tsk_need_resched(rq->curr) &&
|
||||||
p->nr_cpus_allowed > 1 &&
|
tsk_nr_cpus_allowed(p) > 1 &&
|
||||||
dl_task(rq->curr) &&
|
dl_task(rq->curr) &&
|
||||||
(rq->curr->nr_cpus_allowed < 2 ||
|
(tsk_nr_cpus_allowed(rq->curr) < 2 ||
|
||||||
!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
|
!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
|
||||||
push_dl_tasks(rq);
|
push_dl_tasks(rq);
|
||||||
}
|
}
|
||||||
@ -1724,7 +1725,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
|
|||||||
|
|
||||||
if (task_on_rq_queued(p) && rq->curr != p) {
|
if (task_on_rq_queued(p) && rq->curr != p) {
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
|
if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
|
||||||
queue_push_tasks(rq);
|
queue_push_tasks(rq);
|
||||||
#else
|
#else
|
||||||
if (dl_task(rq->curr))
|
if (dl_task(rq->curr))
|
||||||
|
@ -626,15 +626,16 @@ do { \
|
|||||||
#undef P
|
#undef P
|
||||||
#undef PN
|
#undef PN
|
||||||
|
|
||||||
#ifdef CONFIG_SCHEDSTATS
|
|
||||||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
|
||||||
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
|
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
|
||||||
P64(avg_idle);
|
P64(avg_idle);
|
||||||
P64(max_idle_balance_cost);
|
P64(max_idle_balance_cost);
|
||||||
|
#undef P64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
|
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||||
|
|
||||||
if (schedstat_enabled()) {
|
if (schedstat_enabled()) {
|
||||||
P(yld_count);
|
P(yld_count);
|
||||||
P(sched_count);
|
P(sched_count);
|
||||||
@ -644,7 +645,6 @@ do { \
|
|||||||
}
|
}
|
||||||
|
|
||||||
#undef P
|
#undef P
|
||||||
#undef P64
|
|
||||||
#endif
|
#endif
|
||||||
spin_lock_irqsave(&sched_debug_lock, flags);
|
spin_lock_irqsave(&sched_debug_lock, flags);
|
||||||
print_cfs_stats(m, cpu);
|
print_cfs_stats(m, cpu);
|
||||||
|
@ -204,7 +204,7 @@ static void __update_inv_weight(struct load_weight *lw)
|
|||||||
* OR
|
* OR
|
||||||
* (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
|
* (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
|
||||||
*
|
*
|
||||||
* Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
|
* Either weight := NICE_0_LOAD and lw \e sched_prio_to_wmult[], in which case
|
||||||
* we're guaranteed shift stays positive because inv_weight is guaranteed to
|
* we're guaranteed shift stays positive because inv_weight is guaranteed to
|
||||||
* fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
|
* fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
|
||||||
*
|
*
|
||||||
@ -682,17 +682,68 @@ void init_entity_runnable_average(struct sched_entity *se)
|
|||||||
sa->period_contrib = 1023;
|
sa->period_contrib = 1023;
|
||||||
sa->load_avg = scale_load_down(se->load.weight);
|
sa->load_avg = scale_load_down(se->load.weight);
|
||||||
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
|
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
|
||||||
sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
|
/*
|
||||||
sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
|
* At this point, util_avg won't be used in select_task_rq_fair anyway
|
||||||
|
*/
|
||||||
|
sa->util_avg = 0;
|
||||||
|
sa->util_sum = 0;
|
||||||
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* With new tasks being created, their initial util_avgs are extrapolated
|
||||||
|
* based on the cfs_rq's current util_avg:
|
||||||
|
*
|
||||||
|
* util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
|
||||||
|
*
|
||||||
|
* However, in many cases, the above util_avg does not give a desired
|
||||||
|
* value. Moreover, the sum of the util_avgs may be divergent, such
|
||||||
|
* as when the series is a harmonic series.
|
||||||
|
*
|
||||||
|
* To solve this problem, we also cap the util_avg of successive tasks to
|
||||||
|
* only 1/2 of the left utilization budget:
|
||||||
|
*
|
||||||
|
* util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
|
||||||
|
*
|
||||||
|
* where n denotes the nth task.
|
||||||
|
*
|
||||||
|
* For example, a simplest series from the beginning would be like:
|
||||||
|
*
|
||||||
|
* task util_avg: 512, 256, 128, 64, 32, 16, 8, ...
|
||||||
|
* cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
|
||||||
|
*
|
||||||
|
* Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
|
||||||
|
* if util_avg > util_avg_cap.
|
||||||
|
*/
|
||||||
|
void post_init_entity_util_avg(struct sched_entity *se)
|
||||||
|
{
|
||||||
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||||
|
struct sched_avg *sa = &se->avg;
|
||||||
|
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
|
||||||
|
|
||||||
|
if (cap > 0) {
|
||||||
|
if (cfs_rq->avg.util_avg != 0) {
|
||||||
|
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
|
||||||
|
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
|
||||||
|
|
||||||
|
if (sa->util_avg > cap)
|
||||||
|
sa->util_avg = cap;
|
||||||
|
} else {
|
||||||
|
sa->util_avg = cap;
|
||||||
|
}
|
||||||
|
sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
|
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
|
||||||
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
|
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
|
||||||
#else
|
#else
|
||||||
void init_entity_runnable_average(struct sched_entity *se)
|
void init_entity_runnable_average(struct sched_entity *se)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
void post_init_entity_util_avg(struct sched_entity *se)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2437,10 +2488,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||||
if (!parent_entity(se))
|
if (!parent_entity(se))
|
||||||
update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
|
update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
if (entity_is_task(se)) {
|
if (entity_is_task(se)) {
|
||||||
account_numa_dequeue(rq_of(cfs_rq), task_of(se));
|
account_numa_dequeue(rq_of(cfs_rq), task_of(se));
|
||||||
list_del_init(&se->group_node);
|
list_del_init(&se->group_node);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
cfs_rq->nr_running--;
|
cfs_rq->nr_running--;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2549,6 +2602,16 @@ static const u32 runnable_avg_yN_sum[] = {
|
|||||||
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
|
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
|
||||||
|
* lower integers. See Documentation/scheduler/sched-avg.txt how these
|
||||||
|
* were generated:
|
||||||
|
*/
|
||||||
|
static const u32 __accumulated_sum_N32[] = {
|
||||||
|
0, 23371, 35056, 40899, 43820, 45281,
|
||||||
|
46011, 46376, 46559, 46650, 46696, 46719,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Approximate:
|
* Approximate:
|
||||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||||
@ -2597,22 +2660,13 @@ static u32 __compute_runnable_contrib(u64 n)
|
|||||||
else if (unlikely(n >= LOAD_AVG_MAX_N))
|
else if (unlikely(n >= LOAD_AVG_MAX_N))
|
||||||
return LOAD_AVG_MAX;
|
return LOAD_AVG_MAX;
|
||||||
|
|
||||||
/* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
|
/* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
|
||||||
do {
|
contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
|
||||||
contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
|
n %= LOAD_AVG_PERIOD;
|
||||||
contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];
|
|
||||||
|
|
||||||
n -= LOAD_AVG_PERIOD;
|
|
||||||
} while (n > LOAD_AVG_PERIOD);
|
|
||||||
|
|
||||||
contrib = decay_load(contrib, n);
|
contrib = decay_load(contrib, n);
|
||||||
return contrib + runnable_avg_yN_sum[n];
|
return contrib + runnable_avg_yN_sum[n];
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
|
|
||||||
#error "load tracking assumes 2^10 as unit"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2821,55 +2875,11 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
|
|||||||
|
|
||||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
||||||
|
|
||||||
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
|
||||||
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
||||||
{
|
{
|
||||||
struct sched_avg *sa = &cfs_rq->avg;
|
|
||||||
int decayed, removed = 0;
|
|
||||||
|
|
||||||
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
|
|
||||||
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
|
|
||||||
sa->load_avg = max_t(long, sa->load_avg - r, 0);
|
|
||||||
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
|
|
||||||
removed = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
|
|
||||||
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
|
|
||||||
sa->util_avg = max_t(long, sa->util_avg - r, 0);
|
|
||||||
sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
|
|
||||||
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
|
|
||||||
|
|
||||||
#ifndef CONFIG_64BIT
|
|
||||||
smp_wmb();
|
|
||||||
cfs_rq->load_last_update_time_copy = sa->last_update_time;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return decayed || removed;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update task and its cfs_rq load average */
|
|
||||||
static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
|
||||||
{
|
|
||||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
||||||
u64 now = cfs_rq_clock_task(cfs_rq);
|
|
||||||
struct rq *rq = rq_of(cfs_rq);
|
struct rq *rq = rq_of(cfs_rq);
|
||||||
int cpu = cpu_of(rq);
|
int cpu = cpu_of(rq);
|
||||||
|
|
||||||
/*
|
|
||||||
* Track task load average for carrying it to new CPU after migrated, and
|
|
||||||
* track group sched_entity load average for task_h_load calc in migration
|
|
||||||
*/
|
|
||||||
__update_load_avg(now, cpu, &se->avg,
|
|
||||||
se->on_rq * scale_load_down(se->load.weight),
|
|
||||||
cfs_rq->curr == se, NULL);
|
|
||||||
|
|
||||||
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
|
|
||||||
update_tg_load_avg(cfs_rq, 0);
|
|
||||||
|
|
||||||
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
|
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
|
||||||
unsigned long max = rq->cpu_capacity_orig;
|
unsigned long max = rq->cpu_capacity_orig;
|
||||||
|
|
||||||
@ -2894,6 +2904,61 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
||||||
|
static inline int
|
||||||
|
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
||||||
|
{
|
||||||
|
struct sched_avg *sa = &cfs_rq->avg;
|
||||||
|
int decayed, removed_load = 0, removed_util = 0;
|
||||||
|
|
||||||
|
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
|
||||||
|
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
|
||||||
|
sa->load_avg = max_t(long, sa->load_avg - r, 0);
|
||||||
|
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
|
||||||
|
removed_load = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
|
||||||
|
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
|
||||||
|
sa->util_avg = max_t(long, sa->util_avg - r, 0);
|
||||||
|
sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
|
||||||
|
removed_util = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
|
||||||
|
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
|
smp_wmb();
|
||||||
|
cfs_rq->load_last_update_time_copy = sa->last_update_time;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (update_freq && (decayed || removed_util))
|
||||||
|
cfs_rq_util_change(cfs_rq);
|
||||||
|
|
||||||
|
return decayed || removed_load;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update task and its cfs_rq load average */
|
||||||
|
static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
||||||
|
{
|
||||||
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||||
|
u64 now = cfs_rq_clock_task(cfs_rq);
|
||||||
|
struct rq *rq = rq_of(cfs_rq);
|
||||||
|
int cpu = cpu_of(rq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Track task load average for carrying it to new CPU after migrated, and
|
||||||
|
* track group sched_entity load average for task_h_load calc in migration
|
||||||
|
*/
|
||||||
|
__update_load_avg(now, cpu, &se->avg,
|
||||||
|
se->on_rq * scale_load_down(se->load.weight),
|
||||||
|
cfs_rq->curr == se, NULL);
|
||||||
|
|
||||||
|
if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
|
||||||
|
update_tg_load_avg(cfs_rq, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
{
|
{
|
||||||
if (!sched_feat(ATTACH_AGE_LOAD))
|
if (!sched_feat(ATTACH_AGE_LOAD))
|
||||||
@ -2919,6 +2984,8 @@ skip_aging:
|
|||||||
cfs_rq->avg.load_sum += se->avg.load_sum;
|
cfs_rq->avg.load_sum += se->avg.load_sum;
|
||||||
cfs_rq->avg.util_avg += se->avg.util_avg;
|
cfs_rq->avg.util_avg += se->avg.util_avg;
|
||||||
cfs_rq->avg.util_sum += se->avg.util_sum;
|
cfs_rq->avg.util_sum += se->avg.util_sum;
|
||||||
|
|
||||||
|
cfs_rq_util_change(cfs_rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
@ -2931,6 +2998,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
|||||||
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
|
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
|
||||||
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
|
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
|
||||||
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
|
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
|
||||||
|
|
||||||
|
cfs_rq_util_change(cfs_rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add the load generated by se into cfs_rq's load average */
|
/* Add the load generated by se into cfs_rq's load average */
|
||||||
@ -2948,7 +3017,7 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|||||||
cfs_rq->curr == se, NULL);
|
cfs_rq->curr == se, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
decayed = update_cfs_rq_load_avg(now, cfs_rq);
|
decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);
|
||||||
|
|
||||||
cfs_rq->runnable_load_avg += sa->load_avg;
|
cfs_rq->runnable_load_avg += sa->load_avg;
|
||||||
cfs_rq->runnable_load_sum += sa->load_sum;
|
cfs_rq->runnable_load_sum += sa->load_sum;
|
||||||
@ -3185,20 +3254,61 @@ static inline void check_schedstat_required(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MIGRATION
|
||||||
|
*
|
||||||
|
* dequeue
|
||||||
|
* update_curr()
|
||||||
|
* update_min_vruntime()
|
||||||
|
* vruntime -= min_vruntime
|
||||||
|
*
|
||||||
|
* enqueue
|
||||||
|
* update_curr()
|
||||||
|
* update_min_vruntime()
|
||||||
|
* vruntime += min_vruntime
|
||||||
|
*
|
||||||
|
* this way the vruntime transition between RQs is done when both
|
||||||
|
* min_vruntime are up-to-date.
|
||||||
|
*
|
||||||
|
* WAKEUP (remote)
|
||||||
|
*
|
||||||
|
* ->migrate_task_rq_fair() (p->state == TASK_WAKING)
|
||||||
|
* vruntime -= min_vruntime
|
||||||
|
*
|
||||||
|
* enqueue
|
||||||
|
* update_curr()
|
||||||
|
* update_min_vruntime()
|
||||||
|
* vruntime += min_vruntime
|
||||||
|
*
|
||||||
|
* this way we don't have the most up-to-date min_vruntime on the originating
|
||||||
|
* CPU and an up-to-date min_vruntime on the destination CPU.
|
||||||
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||||
{
|
{
|
||||||
/*
|
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
|
||||||
* Update the normalized vruntime before updating min_vruntime
|
bool curr = cfs_rq->curr == se;
|
||||||
* through calling update_curr().
|
|
||||||
*/
|
|
||||||
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
|
|
||||||
se->vruntime += cfs_rq->min_vruntime;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update run-time statistics of the 'current'.
|
* If we're the current task, we must renormalise before calling
|
||||||
|
* update_curr().
|
||||||
*/
|
*/
|
||||||
|
if (renorm && curr)
|
||||||
|
se->vruntime += cfs_rq->min_vruntime;
|
||||||
|
|
||||||
update_curr(cfs_rq);
|
update_curr(cfs_rq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Otherwise, renormalise after, such that we're placed at the current
|
||||||
|
* moment in time, instead of some random moment in the past. Being
|
||||||
|
* placed in the past could significantly boost this task to the
|
||||||
|
* fairness detriment of existing tasks.
|
||||||
|
*/
|
||||||
|
if (renorm && !curr)
|
||||||
|
se->vruntime += cfs_rq->min_vruntime;
|
||||||
|
|
||||||
enqueue_entity_load_avg(cfs_rq, se);
|
enqueue_entity_load_avg(cfs_rq, se);
|
||||||
account_entity_enqueue(cfs_rq, se);
|
account_entity_enqueue(cfs_rq, se);
|
||||||
update_cfs_shares(cfs_rq);
|
update_cfs_shares(cfs_rq);
|
||||||
@ -3214,7 +3324,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||||||
update_stats_enqueue(cfs_rq, se);
|
update_stats_enqueue(cfs_rq, se);
|
||||||
check_spread(cfs_rq, se);
|
check_spread(cfs_rq, se);
|
||||||
}
|
}
|
||||||
if (se != cfs_rq->curr)
|
if (!curr)
|
||||||
__enqueue_entity(cfs_rq, se);
|
__enqueue_entity(cfs_rq, se);
|
||||||
se->on_rq = 1;
|
se->on_rq = 1;
|
||||||
|
|
||||||
@ -4422,7 +4532,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
/*
|
/*
|
||||||
* per rq 'load' arrray crap; XXX kill this.
|
* per rq 'load' arrray crap; XXX kill this.
|
||||||
*/
|
*/
|
||||||
@ -4488,13 +4598,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
|
|||||||
}
|
}
|
||||||
return load;
|
return load;
|
||||||
}
|
}
|
||||||
|
#endif /* CONFIG_NO_HZ_COMMON */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __update_cpu_load - update the rq->cpu_load[] statistics
|
* __cpu_load_update - update the rq->cpu_load[] statistics
|
||||||
* @this_rq: The rq to update statistics for
|
* @this_rq: The rq to update statistics for
|
||||||
* @this_load: The current load
|
* @this_load: The current load
|
||||||
* @pending_updates: The number of missed updates
|
* @pending_updates: The number of missed updates
|
||||||
* @active: !0 for NOHZ_FULL
|
|
||||||
*
|
*
|
||||||
* Update rq->cpu_load[] statistics. This function is usually called every
|
* Update rq->cpu_load[] statistics. This function is usually called every
|
||||||
* scheduler tick (TICK_NSEC).
|
* scheduler tick (TICK_NSEC).
|
||||||
@ -4523,12 +4633,12 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
|
|||||||
* load[i]_n = (1 - 1/2^i)^n * load[i]_0
|
* load[i]_n = (1 - 1/2^i)^n * load[i]_0
|
||||||
*
|
*
|
||||||
* see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
|
* see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
|
||||||
* term. See the @active paramter.
|
* term.
|
||||||
*/
|
*/
|
||||||
static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
|
||||||
unsigned long pending_updates, int active)
|
unsigned long pending_updates)
|
||||||
{
|
{
|
||||||
unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
|
unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
|
||||||
int i, scale;
|
int i, scale;
|
||||||
|
|
||||||
this_rq->nr_load_updates++;
|
this_rq->nr_load_updates++;
|
||||||
@ -4541,6 +4651,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
|||||||
/* scale is effectively 1 << i now, and >> i divides by scale */
|
/* scale is effectively 1 << i now, and >> i divides by scale */
|
||||||
|
|
||||||
old_load = this_rq->cpu_load[i];
|
old_load = this_rq->cpu_load[i];
|
||||||
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
old_load = decay_load_missed(old_load, pending_updates - 1, i);
|
old_load = decay_load_missed(old_load, pending_updates - 1, i);
|
||||||
if (tickless_load) {
|
if (tickless_load) {
|
||||||
old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
|
old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
|
||||||
@ -4551,6 +4662,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
|||||||
*/
|
*/
|
||||||
old_load += tickless_load;
|
old_load += tickless_load;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
new_load = this_load;
|
new_load = this_load;
|
||||||
/*
|
/*
|
||||||
* Round up the averaging division if load is increasing. This
|
* Round up the averaging division if load is increasing. This
|
||||||
@ -4573,10 +4685,23 @@ static unsigned long weighted_cpuload(const int cpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
static void __update_cpu_load_nohz(struct rq *this_rq,
|
/*
|
||||||
unsigned long curr_jiffies,
|
* There is no sane way to deal with nohz on smp when using jiffies because the
|
||||||
unsigned long load,
|
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
|
||||||
int active)
|
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
|
||||||
|
*
|
||||||
|
* Therefore we need to avoid the delta approach from the regular tick when
|
||||||
|
* possible since that would seriously skew the load calculation. This is why we
|
||||||
|
* use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
|
||||||
|
* jiffies deltas for updates happening while in nohz mode (idle ticks, idle
|
||||||
|
* loop exit, nohz_idle_balance, nohz full exit...)
|
||||||
|
*
|
||||||
|
* This means we might still be one tick off for nohz periods.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void cpu_load_update_nohz(struct rq *this_rq,
|
||||||
|
unsigned long curr_jiffies,
|
||||||
|
unsigned long load)
|
||||||
{
|
{
|
||||||
unsigned long pending_updates;
|
unsigned long pending_updates;
|
||||||
|
|
||||||
@ -4588,28 +4713,15 @@ static void __update_cpu_load_nohz(struct rq *this_rq,
|
|||||||
* In the NOHZ_FULL case, we were non-idle, we should consider
|
* In the NOHZ_FULL case, we were non-idle, we should consider
|
||||||
* its weighted load.
|
* its weighted load.
|
||||||
*/
|
*/
|
||||||
__update_cpu_load(this_rq, load, pending_updates, active);
|
cpu_load_update(this_rq, load, pending_updates);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* There is no sane way to deal with nohz on smp when using jiffies because the
|
|
||||||
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
|
|
||||||
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
|
|
||||||
*
|
|
||||||
* Therefore we cannot use the delta approach from the regular tick since that
|
|
||||||
* would seriously skew the load calculation. However we'll make do for those
|
|
||||||
* updates happening while idle (nohz_idle_balance) or coming out of idle
|
|
||||||
* (tick_nohz_idle_exit).
|
|
||||||
*
|
|
||||||
* This means we might still be one tick off for nohz periods.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from nohz_idle_balance() to update the load ratings before doing the
|
* Called from nohz_idle_balance() to update the load ratings before doing the
|
||||||
* idle balance.
|
* idle balance.
|
||||||
*/
|
*/
|
||||||
static void update_cpu_load_idle(struct rq *this_rq)
|
static void cpu_load_update_idle(struct rq *this_rq)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* bail if there's load or we're actually up-to-date.
|
* bail if there's load or we're actually up-to-date.
|
||||||
@ -4617,38 +4729,71 @@ static void update_cpu_load_idle(struct rq *this_rq)
|
|||||||
if (weighted_cpuload(cpu_of(this_rq)))
|
if (weighted_cpuload(cpu_of(this_rq)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
|
cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
|
* Record CPU load on nohz entry so we know the tickless load to account
|
||||||
|
* on nohz exit. cpu_load[0] happens then to be updated more frequently
|
||||||
|
* than other cpu_load[idx] but it should be fine as cpu_load readers
|
||||||
|
* shouldn't rely into synchronized cpu_load[*] updates.
|
||||||
*/
|
*/
|
||||||
void update_cpu_load_nohz(int active)
|
void cpu_load_update_nohz_start(void)
|
||||||
{
|
{
|
||||||
struct rq *this_rq = this_rq();
|
struct rq *this_rq = this_rq();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is all lockless but should be fine. If weighted_cpuload changes
|
||||||
|
* concurrently we'll exit nohz. And cpu_load write can race with
|
||||||
|
* cpu_load_update_idle() but both updater would be writing the same.
|
||||||
|
*/
|
||||||
|
this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Account the tickless load in the end of a nohz frame.
|
||||||
|
*/
|
||||||
|
void cpu_load_update_nohz_stop(void)
|
||||||
|
{
|
||||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||||
unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
|
struct rq *this_rq = this_rq();
|
||||||
|
unsigned long load;
|
||||||
|
|
||||||
if (curr_jiffies == this_rq->last_load_update_tick)
|
if (curr_jiffies == this_rq->last_load_update_tick)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
load = weighted_cpuload(cpu_of(this_rq));
|
||||||
raw_spin_lock(&this_rq->lock);
|
raw_spin_lock(&this_rq->lock);
|
||||||
__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
|
update_rq_clock(this_rq);
|
||||||
|
cpu_load_update_nohz(this_rq, curr_jiffies, load);
|
||||||
raw_spin_unlock(&this_rq->lock);
|
raw_spin_unlock(&this_rq->lock);
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NO_HZ */
|
#else /* !CONFIG_NO_HZ_COMMON */
|
||||||
|
static inline void cpu_load_update_nohz(struct rq *this_rq,
|
||||||
|
unsigned long curr_jiffies,
|
||||||
|
unsigned long load) { }
|
||||||
|
#endif /* CONFIG_NO_HZ_COMMON */
|
||||||
|
|
||||||
|
static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
|
/* See the mess around cpu_load_update_nohz(). */
|
||||||
|
this_rq->last_load_update_tick = READ_ONCE(jiffies);
|
||||||
|
#endif
|
||||||
|
cpu_load_update(this_rq, load, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from scheduler_tick()
|
* Called from scheduler_tick()
|
||||||
*/
|
*/
|
||||||
void update_cpu_load_active(struct rq *this_rq)
|
void cpu_load_update_active(struct rq *this_rq)
|
||||||
{
|
{
|
||||||
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
||||||
/*
|
|
||||||
* See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
|
if (tick_nohz_tick_stopped())
|
||||||
*/
|
cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
|
||||||
this_rq->last_load_update_tick = jiffies;
|
else
|
||||||
__update_cpu_load(this_rq, load, 1, 1);
|
cpu_load_update_periodic(this_rq, load);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4706,46 +4851,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void record_wakee(struct task_struct *p)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Rough decay (wiping) for cost saving, don't worry
|
|
||||||
* about the boundary, really active task won't care
|
|
||||||
* about the loss.
|
|
||||||
*/
|
|
||||||
if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
|
|
||||||
current->wakee_flips >>= 1;
|
|
||||||
current->wakee_flip_decay_ts = jiffies;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current->last_wakee != p) {
|
|
||||||
current->last_wakee = p;
|
|
||||||
current->wakee_flips++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void task_waking_fair(struct task_struct *p)
|
|
||||||
{
|
|
||||||
struct sched_entity *se = &p->se;
|
|
||||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
||||||
u64 min_vruntime;
|
|
||||||
|
|
||||||
#ifndef CONFIG_64BIT
|
|
||||||
u64 min_vruntime_copy;
|
|
||||||
|
|
||||||
do {
|
|
||||||
min_vruntime_copy = cfs_rq->min_vruntime_copy;
|
|
||||||
smp_rmb();
|
|
||||||
min_vruntime = cfs_rq->min_vruntime;
|
|
||||||
} while (min_vruntime != min_vruntime_copy);
|
|
||||||
#else
|
|
||||||
min_vruntime = cfs_rq->min_vruntime;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
se->vruntime -= min_vruntime;
|
|
||||||
record_wakee(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
/*
|
/*
|
||||||
* effective_load() calculates the load change as seen from the root_task_group
|
* effective_load() calculates the load change as seen from the root_task_group
|
||||||
@ -4861,17 +4966,39 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void record_wakee(struct task_struct *p)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Only decay a single time; tasks that have less then 1 wakeup per
|
||||||
|
* jiffy will not have built up many flips.
|
||||||
|
*/
|
||||||
|
if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
|
||||||
|
current->wakee_flips >>= 1;
|
||||||
|
current->wakee_flip_decay_ts = jiffies;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current->last_wakee != p) {
|
||||||
|
current->last_wakee = p;
|
||||||
|
current->wakee_flips++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Detect M:N waker/wakee relationships via a switching-frequency heuristic.
|
* Detect M:N waker/wakee relationships via a switching-frequency heuristic.
|
||||||
|
*
|
||||||
* A waker of many should wake a different task than the one last awakened
|
* A waker of many should wake a different task than the one last awakened
|
||||||
* at a frequency roughly N times higher than one of its wakees. In order
|
* at a frequency roughly N times higher than one of its wakees.
|
||||||
* to determine whether we should let the load spread vs consolodating to
|
*
|
||||||
* shared cache, we look for a minimum 'flip' frequency of llc_size in one
|
* In order to determine whether we should let the load spread vs consolidating
|
||||||
* partner, and a factor of lls_size higher frequency in the other. With
|
* to shared cache, we look for a minimum 'flip' frequency of llc_size in one
|
||||||
* both conditions met, we can be relatively sure that the relationship is
|
* partner, and a factor of lls_size higher frequency in the other.
|
||||||
* non-monogamous, with partner count exceeding socket size. Waker/wakee
|
*
|
||||||
* being client/server, worker/dispatcher, interrupt source or whatever is
|
* With both conditions met, we can be relatively sure that the relationship is
|
||||||
* irrelevant, spread criteria is apparent partner count exceeds socket size.
|
* non-monogamous, with partner count exceeding socket size.
|
||||||
|
*
|
||||||
|
* Waker/wakee being client/server, worker/dispatcher, interrupt source or
|
||||||
|
* whatever is irrelevant, spread criteria is apparent partner count exceeds
|
||||||
|
* socket size.
|
||||||
*/
|
*/
|
||||||
static int wake_wide(struct task_struct *p)
|
static int wake_wide(struct task_struct *p)
|
||||||
{
|
{
|
||||||
@ -5176,8 +5303,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|||||||
int want_affine = 0;
|
int want_affine = 0;
|
||||||
int sync = wake_flags & WF_SYNC;
|
int sync = wake_flags & WF_SYNC;
|
||||||
|
|
||||||
if (sd_flag & SD_BALANCE_WAKE)
|
if (sd_flag & SD_BALANCE_WAKE) {
|
||||||
|
record_wakee(p);
|
||||||
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
|
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
|
||||||
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_domain(cpu, tmp) {
|
for_each_domain(cpu, tmp) {
|
||||||
@ -5256,6 +5385,32 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|||||||
*/
|
*/
|
||||||
static void migrate_task_rq_fair(struct task_struct *p)
|
static void migrate_task_rq_fair(struct task_struct *p)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* As blocked tasks retain absolute vruntime the migration needs to
|
||||||
|
* deal with this by subtracting the old and adding the new
|
||||||
|
* min_vruntime -- the latter is done by enqueue_entity() when placing
|
||||||
|
* the task on the new runqueue.
|
||||||
|
*/
|
||||||
|
if (p->state == TASK_WAKING) {
|
||||||
|
struct sched_entity *se = &p->se;
|
||||||
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||||
|
u64 min_vruntime;
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
|
u64 min_vruntime_copy;
|
||||||
|
|
||||||
|
do {
|
||||||
|
min_vruntime_copy = cfs_rq->min_vruntime_copy;
|
||||||
|
smp_rmb();
|
||||||
|
min_vruntime = cfs_rq->min_vruntime;
|
||||||
|
} while (min_vruntime != min_vruntime_copy);
|
||||||
|
#else
|
||||||
|
min_vruntime = cfs_rq->min_vruntime;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
se->vruntime -= min_vruntime;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are supposed to update the task to "current" time, then its up to date
|
* We are supposed to update the task to "current" time, then its up to date
|
||||||
* and ready to go to new CPU/cfs_rq. But we have difficulty in getting
|
* and ready to go to new CPU/cfs_rq. But we have difficulty in getting
|
||||||
@ -5439,7 +5594,7 @@ preempt:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct task_struct *
|
static struct task_struct *
|
||||||
pick_next_task_fair(struct rq *rq, struct task_struct *prev)
|
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct cfs_rq *cfs_rq = &rq->cfs;
|
struct cfs_rq *cfs_rq = &rq->cfs;
|
||||||
struct sched_entity *se;
|
struct sched_entity *se;
|
||||||
@ -5552,9 +5707,9 @@ idle:
|
|||||||
* further scheduler activity on it and we're being very careful to
|
* further scheduler activity on it and we're being very careful to
|
||||||
* re-start the picking loop.
|
* re-start the picking loop.
|
||||||
*/
|
*/
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, cookie);
|
||||||
new_tasks = idle_balance(rq);
|
new_tasks = idle_balance(rq);
|
||||||
lockdep_pin_lock(&rq->lock);
|
lockdep_repin_lock(&rq->lock, cookie);
|
||||||
/*
|
/*
|
||||||
* Because idle_balance() releases (and re-acquires) rq->lock, it is
|
* Because idle_balance() releases (and re-acquires) rq->lock, it is
|
||||||
* possible for any higher priority task to appear. In that case we
|
* possible for any higher priority task to appear. In that case we
|
||||||
@ -5653,7 +5808,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
|
|||||||
* W_i,0 = \Sum_j w_i,j (2)
|
* W_i,0 = \Sum_j w_i,j (2)
|
||||||
*
|
*
|
||||||
* Where w_i,j is the weight of the j-th runnable task on cpu i. This weight
|
* Where w_i,j is the weight of the j-th runnable task on cpu i. This weight
|
||||||
* is derived from the nice value as per prio_to_weight[].
|
* is derived from the nice value as per sched_prio_to_weight[].
|
||||||
*
|
*
|
||||||
* The weight average is an exponential decay average of the instantaneous
|
* The weight average is an exponential decay average of the instantaneous
|
||||||
* weight:
|
* weight:
|
||||||
@ -6155,7 +6310,7 @@ static void update_blocked_averages(int cpu)
|
|||||||
if (throttled_hierarchy(cfs_rq))
|
if (throttled_hierarchy(cfs_rq))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
|
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
|
||||||
update_tg_load_avg(cfs_rq, 0);
|
update_tg_load_avg(cfs_rq, 0);
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||||
@ -6216,7 +6371,7 @@ static inline void update_blocked_averages(int cpu)
|
|||||||
|
|
||||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
|
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
|
||||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6625,6 +6780,9 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|||||||
if (!(env->sd->flags & SD_ASYM_PACKING))
|
if (!(env->sd->flags & SD_ASYM_PACKING))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
/* No ASYM_PACKING if target cpu is already busy */
|
||||||
|
if (env->idle == CPU_NOT_IDLE)
|
||||||
|
return true;
|
||||||
/*
|
/*
|
||||||
* ASYM_PACKING needs to move all the work to the lowest
|
* ASYM_PACKING needs to move all the work to the lowest
|
||||||
* numbered CPUs in the group, therefore mark all groups
|
* numbered CPUs in the group, therefore mark all groups
|
||||||
@ -6634,7 +6792,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|||||||
if (!sds->busiest)
|
if (!sds->busiest)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
|
/* Prefer to move from highest possible cpu's work */
|
||||||
|
if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6780,6 +6939,9 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
|
|||||||
if (!(env->sd->flags & SD_ASYM_PACKING))
|
if (!(env->sd->flags & SD_ASYM_PACKING))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (env->idle == CPU_NOT_IDLE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (!sds->busiest)
|
if (!sds->busiest)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -6888,9 +7050,10 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the presence of smp nice balancing, certain scenarios can have
|
* Avg load of busiest sg can be less and avg load of local sg can
|
||||||
* max load less than avg load(as we skip the groups at or below
|
* be greater than avg load across all sgs of sd because avg load
|
||||||
* its cpu_capacity, while calculating max_load..)
|
* factors in sg capacity and sgs with smaller group_type are
|
||||||
|
* skipped when updating the busiest sg:
|
||||||
*/
|
*/
|
||||||
if (busiest->avg_load <= sds->avg_load ||
|
if (busiest->avg_load <= sds->avg_load ||
|
||||||
local->avg_load >= sds->avg_load) {
|
local->avg_load >= sds->avg_load) {
|
||||||
@ -6903,11 +7066,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||||||
*/
|
*/
|
||||||
if (busiest->group_type == group_overloaded &&
|
if (busiest->group_type == group_overloaded &&
|
||||||
local->group_type == group_overloaded) {
|
local->group_type == group_overloaded) {
|
||||||
load_above_capacity = busiest->sum_nr_running *
|
load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
|
||||||
SCHED_LOAD_SCALE;
|
if (load_above_capacity > busiest->group_capacity) {
|
||||||
if (load_above_capacity > busiest->group_capacity)
|
|
||||||
load_above_capacity -= busiest->group_capacity;
|
load_above_capacity -= busiest->group_capacity;
|
||||||
else
|
load_above_capacity *= NICE_0_LOAD;
|
||||||
|
load_above_capacity /= busiest->group_capacity;
|
||||||
|
} else
|
||||||
load_above_capacity = ~0UL;
|
load_above_capacity = ~0UL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6915,9 +7079,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||||||
* We're trying to get all the cpus to the average_load, so we don't
|
* We're trying to get all the cpus to the average_load, so we don't
|
||||||
* want to push ourselves above the average load, nor do we wish to
|
* want to push ourselves above the average load, nor do we wish to
|
||||||
* reduce the max loaded cpu below the average load. At the same time,
|
* reduce the max loaded cpu below the average load. At the same time,
|
||||||
* we also don't want to reduce the group load below the group capacity
|
* we also don't want to reduce the group load below the group
|
||||||
* (so that we can implement power-savings policies etc). Thus we look
|
* capacity. Thus we look for the minimum possible imbalance.
|
||||||
* for the minimum possible imbalance.
|
|
||||||
*/
|
*/
|
||||||
max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
|
max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
|
||||||
|
|
||||||
@ -6941,10 +7104,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* find_busiest_group - Returns the busiest group within the sched_domain
|
* find_busiest_group - Returns the busiest group within the sched_domain
|
||||||
* if there is an imbalance. If there isn't an imbalance, and
|
* if there is an imbalance.
|
||||||
* the user has opted for power-savings, it returns a group whose
|
|
||||||
* CPUs can be put to idle by rebalancing those tasks elsewhere, if
|
|
||||||
* such a group exists.
|
|
||||||
*
|
*
|
||||||
* Also calculates the amount of weighted load which should be moved
|
* Also calculates the amount of weighted load which should be moved
|
||||||
* to restore balance.
|
* to restore balance.
|
||||||
@ -6952,9 +7112,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
|||||||
* @env: The load balancing environment.
|
* @env: The load balancing environment.
|
||||||
*
|
*
|
||||||
* Return: - The busiest group if imbalance exists.
|
* Return: - The busiest group if imbalance exists.
|
||||||
* - If no imbalance and user has opted for power-savings balance,
|
|
||||||
* return the least loaded group whose CPUs can be
|
|
||||||
* put to idle by rebalancing its tasks onto our group.
|
|
||||||
*/
|
*/
|
||||||
static struct sched_group *find_busiest_group(struct lb_env *env)
|
static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||||
{
|
{
|
||||||
@ -6972,8 +7129,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
|||||||
busiest = &sds.busiest_stat;
|
busiest = &sds.busiest_stat;
|
||||||
|
|
||||||
/* ASYM feature bypasses nice load balance check */
|
/* ASYM feature bypasses nice load balance check */
|
||||||
if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
|
if (check_asym_packing(env, &sds))
|
||||||
check_asym_packing(env, &sds))
|
|
||||||
return sds.busiest;
|
return sds.busiest;
|
||||||
|
|
||||||
/* There is no busy sibling group to pull tasks from */
|
/* There is no busy sibling group to pull tasks from */
|
||||||
@ -7398,10 +7554,7 @@ more_balance:
|
|||||||
&busiest->active_balance_work);
|
&busiest->active_balance_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* We've kicked active balancing, force task migration. */
|
||||||
* We've kicked active balancing, reset the failure
|
|
||||||
* counter.
|
|
||||||
*/
|
|
||||||
sd->nr_balance_failed = sd->cache_nice_tries+1;
|
sd->nr_balance_failed = sd->cache_nice_tries+1;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
@ -7636,10 +7789,13 @@ static int active_load_balance_cpu_stop(void *data)
|
|||||||
schedstat_inc(sd, alb_count);
|
schedstat_inc(sd, alb_count);
|
||||||
|
|
||||||
p = detach_one_task(&env);
|
p = detach_one_task(&env);
|
||||||
if (p)
|
if (p) {
|
||||||
schedstat_inc(sd, alb_pushed);
|
schedstat_inc(sd, alb_pushed);
|
||||||
else
|
/* Active balancing done, reset the failure counter. */
|
||||||
|
sd->nr_balance_failed = 0;
|
||||||
|
} else {
|
||||||
schedstat_inc(sd, alb_failed);
|
schedstat_inc(sd, alb_failed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
out_unlock:
|
out_unlock:
|
||||||
@ -7710,7 +7866,7 @@ static void nohz_balancer_kick(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void nohz_balance_exit_idle(int cpu)
|
void nohz_balance_exit_idle(unsigned int cpu)
|
||||||
{
|
{
|
||||||
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
|
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
|
||||||
/*
|
/*
|
||||||
@ -7783,18 +7939,6 @@ void nohz_balance_enter_idle(int cpu)
|
|||||||
atomic_inc(&nohz.nr_cpus);
|
atomic_inc(&nohz.nr_cpus);
|
||||||
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
|
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sched_ilb_notifier(struct notifier_block *nfb,
|
|
||||||
unsigned long action, void *hcpu)
|
|
||||||
{
|
|
||||||
switch (action & ~CPU_TASKS_FROZEN) {
|
|
||||||
case CPU_DYING:
|
|
||||||
nohz_balance_exit_idle(smp_processor_id());
|
|
||||||
return NOTIFY_OK;
|
|
||||||
default:
|
|
||||||
return NOTIFY_DONE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(balancing);
|
static DEFINE_SPINLOCK(balancing);
|
||||||
@ -7956,7 +8100,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
|||||||
if (time_after_eq(jiffies, rq->next_balance)) {
|
if (time_after_eq(jiffies, rq->next_balance)) {
|
||||||
raw_spin_lock_irq(&rq->lock);
|
raw_spin_lock_irq(&rq->lock);
|
||||||
update_rq_clock(rq);
|
update_rq_clock(rq);
|
||||||
update_cpu_load_idle(rq);
|
cpu_load_update_idle(rq);
|
||||||
raw_spin_unlock_irq(&rq->lock);
|
raw_spin_unlock_irq(&rq->lock);
|
||||||
rebalance_domains(rq, CPU_IDLE);
|
rebalance_domains(rq, CPU_IDLE);
|
||||||
}
|
}
|
||||||
@ -8381,6 +8525,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||||||
init_cfs_rq(cfs_rq);
|
init_cfs_rq(cfs_rq);
|
||||||
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
|
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
|
||||||
init_entity_runnable_average(se);
|
init_entity_runnable_average(se);
|
||||||
|
post_init_entity_util_avg(se);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -8537,7 +8682,6 @@ const struct sched_class fair_sched_class = {
|
|||||||
.rq_online = rq_online_fair,
|
.rq_online = rq_online_fair,
|
||||||
.rq_offline = rq_offline_fair,
|
.rq_offline = rq_offline_fair,
|
||||||
|
|
||||||
.task_waking = task_waking_fair,
|
|
||||||
.task_dead = task_dead_fair,
|
.task_dead = task_dead_fair,
|
||||||
.set_cpus_allowed = set_cpus_allowed_common,
|
.set_cpus_allowed = set_cpus_allowed_common,
|
||||||
#endif
|
#endif
|
||||||
@ -8599,7 +8743,6 @@ __init void init_sched_fair_class(void)
|
|||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
nohz.next_balance = jiffies;
|
nohz.next_balance = jiffies;
|
||||||
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
|
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
|
||||||
cpu_notifier(sched_ilb_notifier, 0);
|
|
||||||
#endif
|
#endif
|
||||||
#endif /* SMP */
|
#endif /* SMP */
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct task_struct *
|
static struct task_struct *
|
||||||
pick_next_task_idle(struct rq *rq, struct task_struct *prev)
|
pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
put_prev_task(rq, prev);
|
put_prev_task(rq, prev);
|
||||||
|
|
||||||
|
@ -99,10 +99,13 @@ long calc_load_fold_active(struct rq *this_rq)
|
|||||||
static unsigned long
|
static unsigned long
|
||||||
calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||||
{
|
{
|
||||||
load *= exp;
|
unsigned long newload;
|
||||||
load += active * (FIXED_1 - exp);
|
|
||||||
load += 1UL << (FSHIFT - 1);
|
newload = load * exp + active * (FIXED_1 - exp);
|
||||||
return load >> FSHIFT;
|
if (active >= load)
|
||||||
|
newload += FIXED_1-1;
|
||||||
|
|
||||||
|
return newload / FIXED_1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
|
@ -334,7 +334,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||||||
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
|
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
|
||||||
|
|
||||||
rt_rq->rt_nr_total++;
|
rt_rq->rt_nr_total++;
|
||||||
if (p->nr_cpus_allowed > 1)
|
if (tsk_nr_cpus_allowed(p) > 1)
|
||||||
rt_rq->rt_nr_migratory++;
|
rt_rq->rt_nr_migratory++;
|
||||||
|
|
||||||
update_rt_migration(rt_rq);
|
update_rt_migration(rt_rq);
|
||||||
@ -351,7 +351,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||||||
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
|
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
|
||||||
|
|
||||||
rt_rq->rt_nr_total--;
|
rt_rq->rt_nr_total--;
|
||||||
if (p->nr_cpus_allowed > 1)
|
if (tsk_nr_cpus_allowed(p) > 1)
|
||||||
rt_rq->rt_nr_migratory--;
|
rt_rq->rt_nr_migratory--;
|
||||||
|
|
||||||
update_rt_migration(rt_rq);
|
update_rt_migration(rt_rq);
|
||||||
@ -953,14 +953,14 @@ static void update_curr_rt(struct rq *rq)
|
|||||||
if (curr->sched_class != &rt_sched_class)
|
if (curr->sched_class != &rt_sched_class)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
|
|
||||||
if (cpu_of(rq) == smp_processor_id())
|
|
||||||
cpufreq_trigger_update(rq_clock(rq));
|
|
||||||
|
|
||||||
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
||||||
if (unlikely((s64)delta_exec <= 0))
|
if (unlikely((s64)delta_exec <= 0))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
|
||||||
|
if (cpu_of(rq) == smp_processor_id())
|
||||||
|
cpufreq_trigger_update(rq_clock(rq));
|
||||||
|
|
||||||
schedstat_set(curr->se.statistics.exec_max,
|
schedstat_set(curr->se.statistics.exec_max,
|
||||||
max(curr->se.statistics.exec_max, delta_exec));
|
max(curr->se.statistics.exec_max, delta_exec));
|
||||||
|
|
||||||
@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
|
|
||||||
enqueue_rt_entity(rt_se, flags);
|
enqueue_rt_entity(rt_se, flags);
|
||||||
|
|
||||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
|
||||||
enqueue_pushable_task(rq, p);
|
enqueue_pushable_task(rq, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
|
|||||||
* will have to sort it out.
|
* will have to sort it out.
|
||||||
*/
|
*/
|
||||||
if (curr && unlikely(rt_task(curr)) &&
|
if (curr && unlikely(rt_task(curr)) &&
|
||||||
(curr->nr_cpus_allowed < 2 ||
|
(tsk_nr_cpus_allowed(curr) < 2 ||
|
||||||
curr->prio <= p->prio)) {
|
curr->prio <= p->prio)) {
|
||||||
int target = find_lowest_rq(p);
|
int target = find_lowest_rq(p);
|
||||||
|
|
||||||
@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
|||||||
* Current can't be migrated, useless to reschedule,
|
* Current can't be migrated, useless to reschedule,
|
||||||
* let's hope p can move out.
|
* let's hope p can move out.
|
||||||
*/
|
*/
|
||||||
if (rq->curr->nr_cpus_allowed == 1 ||
|
if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
|
||||||
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
|
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
|||||||
* p is migratable, so let's not schedule it and
|
* p is migratable, so let's not schedule it and
|
||||||
* see if it is pushed or pulled somewhere else.
|
* see if it is pushed or pulled somewhere else.
|
||||||
*/
|
*/
|
||||||
if (p->nr_cpus_allowed != 1
|
if (tsk_nr_cpus_allowed(p) != 1
|
||||||
&& cpupri_find(&rq->rd->cpupri, p, NULL))
|
&& cpupri_find(&rq->rd->cpupri, p, NULL))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1524,7 +1524,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct task_struct *
|
static struct task_struct *
|
||||||
pick_next_task_rt(struct rq *rq, struct task_struct *prev)
|
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
struct rt_rq *rt_rq = &rq->rt;
|
struct rt_rq *rt_rq = &rq->rt;
|
||||||
@ -1536,9 +1536,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
|
|||||||
* disabled avoiding further scheduler activity on it and we're
|
* disabled avoiding further scheduler activity on it and we're
|
||||||
* being very careful to re-start the picking loop.
|
* being very careful to re-start the picking loop.
|
||||||
*/
|
*/
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, cookie);
|
||||||
pull_rt_task(rq);
|
pull_rt_task(rq);
|
||||||
lockdep_pin_lock(&rq->lock);
|
lockdep_repin_lock(&rq->lock, cookie);
|
||||||
/*
|
/*
|
||||||
* pull_rt_task() can drop (and re-acquire) rq->lock; this
|
* pull_rt_task() can drop (and re-acquire) rq->lock; this
|
||||||
* means a dl or stop task can slip in, in which case we need
|
* means a dl or stop task can slip in, in which case we need
|
||||||
@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
|||||||
* The previous task needs to be made eligible for pushing
|
* The previous task needs to be made eligible for pushing
|
||||||
* if it is still active
|
* if it is still active
|
||||||
*/
|
*/
|
||||||
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
|
if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
|
||||||
enqueue_pushable_task(rq, p);
|
enqueue_pushable_task(rq, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task)
|
|||||||
if (unlikely(!lowest_mask))
|
if (unlikely(!lowest_mask))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (task->nr_cpus_allowed == 1)
|
if (tsk_nr_cpus_allowed(task) == 1)
|
||||||
return -1; /* No other targets possible */
|
return -1; /* No other targets possible */
|
||||||
|
|
||||||
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
||||||
@ -1762,7 +1762,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
|
|||||||
|
|
||||||
BUG_ON(rq->cpu != task_cpu(p));
|
BUG_ON(rq->cpu != task_cpu(p));
|
||||||
BUG_ON(task_current(rq, p));
|
BUG_ON(task_current(rq, p));
|
||||||
BUG_ON(p->nr_cpus_allowed <= 1);
|
BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
|
||||||
|
|
||||||
BUG_ON(!task_on_rq_queued(p));
|
BUG_ON(!task_on_rq_queued(p));
|
||||||
BUG_ON(!rt_task(p));
|
BUG_ON(!rt_task(p));
|
||||||
@ -2122,9 +2122,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
|
|||||||
{
|
{
|
||||||
if (!task_running(rq, p) &&
|
if (!task_running(rq, p) &&
|
||||||
!test_tsk_need_resched(rq->curr) &&
|
!test_tsk_need_resched(rq->curr) &&
|
||||||
p->nr_cpus_allowed > 1 &&
|
tsk_nr_cpus_allowed(p) > 1 &&
|
||||||
(dl_task(rq->curr) || rt_task(rq->curr)) &&
|
(dl_task(rq->curr) || rt_task(rq->curr)) &&
|
||||||
(rq->curr->nr_cpus_allowed < 2 ||
|
(tsk_nr_cpus_allowed(rq->curr) < 2 ||
|
||||||
rq->curr->prio <= p->prio))
|
rq->curr->prio <= p->prio))
|
||||||
push_rt_tasks(rq);
|
push_rt_tasks(rq);
|
||||||
}
|
}
|
||||||
@ -2197,7 +2197,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
|
|||||||
*/
|
*/
|
||||||
if (task_on_rq_queued(p) && rq->curr != p) {
|
if (task_on_rq_queued(p) && rq->curr != p) {
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
|
if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
|
||||||
queue_push_tasks(rq);
|
queue_push_tasks(rq);
|
||||||
#else
|
#else
|
||||||
if (p->prio < rq->curr->prio)
|
if (p->prio < rq->curr->prio)
|
||||||
|
@ -31,9 +31,9 @@ extern void calc_global_load_tick(struct rq *this_rq);
|
|||||||
extern long calc_load_fold_active(struct rq *this_rq);
|
extern long calc_load_fold_active(struct rq *this_rq);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
extern void update_cpu_load_active(struct rq *this_rq);
|
extern void cpu_load_update_active(struct rq *this_rq);
|
||||||
#else
|
#else
|
||||||
static inline void update_cpu_load_active(struct rq *this_rq) { }
|
static inline void cpu_load_update_active(struct rq *this_rq) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -49,25 +49,32 @@ static inline void update_cpu_load_active(struct rq *this_rq) { }
|
|||||||
* and does not change the user-interface for setting shares/weights.
|
* and does not change the user-interface for setting shares/weights.
|
||||||
*
|
*
|
||||||
* We increase resolution only if we have enough bits to allow this increased
|
* We increase resolution only if we have enough bits to allow this increased
|
||||||
* resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
|
* resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
|
||||||
* when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
|
* pretty high and the returns do not justify the increased costs.
|
||||||
* increased costs.
|
*
|
||||||
|
* Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
|
||||||
|
* increase coverage and consistency always enable it on 64bit platforms.
|
||||||
*/
|
*/
|
||||||
#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */
|
#ifdef CONFIG_64BIT
|
||||||
# define SCHED_LOAD_RESOLUTION 10
|
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
|
||||||
# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION)
|
# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
|
||||||
# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)
|
# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
|
||||||
#else
|
#else
|
||||||
# define SCHED_LOAD_RESOLUTION 0
|
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
|
||||||
# define scale_load(w) (w)
|
# define scale_load(w) (w)
|
||||||
# define scale_load_down(w) (w)
|
# define scale_load_down(w) (w)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION)
|
/*
|
||||||
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
|
* Task weight (visible to users) and its load (invisible to users) have
|
||||||
|
* independent resolution, but they should be well calibrated. We use
|
||||||
#define NICE_0_LOAD SCHED_LOAD_SCALE
|
* scale_load() and scale_load_down(w) to convert between them. The
|
||||||
#define NICE_0_SHIFT SCHED_LOAD_SHIFT
|
* following must be true:
|
||||||
|
*
|
||||||
|
* scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Single value that decides SCHED_DEADLINE internal math precision.
|
* Single value that decides SCHED_DEADLINE internal math precision.
|
||||||
@ -585,11 +592,13 @@ struct rq {
|
|||||||
#endif
|
#endif
|
||||||
#define CPU_LOAD_IDX_MAX 5
|
#define CPU_LOAD_IDX_MAX 5
|
||||||
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
|
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
|
||||||
unsigned long last_load_update_tick;
|
|
||||||
#ifdef CONFIG_NO_HZ_COMMON
|
#ifdef CONFIG_NO_HZ_COMMON
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
unsigned long last_load_update_tick;
|
||||||
|
#endif /* CONFIG_SMP */
|
||||||
u64 nohz_stamp;
|
u64 nohz_stamp;
|
||||||
unsigned long nohz_flags;
|
unsigned long nohz_flags;
|
||||||
#endif
|
#endif /* CONFIG_NO_HZ_COMMON */
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
unsigned long last_sched_tick;
|
unsigned long last_sched_tick;
|
||||||
#endif
|
#endif
|
||||||
@ -854,7 +863,7 @@ DECLARE_PER_CPU(struct sched_domain *, sd_asym);
|
|||||||
struct sched_group_capacity {
|
struct sched_group_capacity {
|
||||||
atomic_t ref;
|
atomic_t ref;
|
||||||
/*
|
/*
|
||||||
* CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
|
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
|
||||||
* for a single CPU.
|
* for a single CPU.
|
||||||
*/
|
*/
|
||||||
unsigned int capacity;
|
unsigned int capacity;
|
||||||
@ -1159,7 +1168,7 @@ extern const u32 sched_prio_to_wmult[40];
|
|||||||
*
|
*
|
||||||
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
||||||
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
||||||
* ENQUEUE_WAKING - sched_class::task_waking was called
|
* ENQUEUE_MIGRATED - the task was migrated during wakeup
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -1174,9 +1183,9 @@ extern const u32 sched_prio_to_wmult[40];
|
|||||||
#define ENQUEUE_HEAD 0x08
|
#define ENQUEUE_HEAD 0x08
|
||||||
#define ENQUEUE_REPLENISH 0x10
|
#define ENQUEUE_REPLENISH 0x10
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
#define ENQUEUE_WAKING 0x20
|
#define ENQUEUE_MIGRATED 0x20
|
||||||
#else
|
#else
|
||||||
#define ENQUEUE_WAKING 0x00
|
#define ENQUEUE_MIGRATED 0x00
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define RETRY_TASK ((void *)-1UL)
|
#define RETRY_TASK ((void *)-1UL)
|
||||||
@ -1200,14 +1209,14 @@ struct sched_class {
|
|||||||
* tasks.
|
* tasks.
|
||||||
*/
|
*/
|
||||||
struct task_struct * (*pick_next_task) (struct rq *rq,
|
struct task_struct * (*pick_next_task) (struct rq *rq,
|
||||||
struct task_struct *prev);
|
struct task_struct *prev,
|
||||||
|
struct pin_cookie cookie);
|
||||||
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
|
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
|
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
|
||||||
void (*migrate_task_rq)(struct task_struct *p);
|
void (*migrate_task_rq)(struct task_struct *p);
|
||||||
|
|
||||||
void (*task_waking) (struct task_struct *task);
|
|
||||||
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
|
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
|
||||||
|
|
||||||
void (*set_cpus_allowed)(struct task_struct *p,
|
void (*set_cpus_allowed)(struct task_struct *p,
|
||||||
@ -1313,6 +1322,7 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
|
|||||||
unsigned long to_ratio(u64 period, u64 runtime);
|
unsigned long to_ratio(u64 period, u64 runtime);
|
||||||
|
|
||||||
extern void init_entity_runnable_average(struct sched_entity *se);
|
extern void init_entity_runnable_average(struct sched_entity *se);
|
||||||
|
extern void post_init_entity_util_avg(struct sched_entity *se);
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
extern bool sched_can_stop_tick(struct rq *rq);
|
extern bool sched_can_stop_tick(struct rq *rq);
|
||||||
@ -1448,86 +1458,32 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
|
|||||||
static inline void sched_avg_update(struct rq *rq) { }
|
static inline void sched_avg_update(struct rq *rq) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
struct rq_flags {
|
||||||
* __task_rq_lock - lock the rq @p resides on.
|
unsigned long flags;
|
||||||
*/
|
struct pin_cookie cookie;
|
||||||
static inline struct rq *__task_rq_lock(struct task_struct *p)
|
};
|
||||||
__acquires(rq->lock)
|
|
||||||
{
|
|
||||||
struct rq *rq;
|
|
||||||
|
|
||||||
lockdep_assert_held(&p->pi_lock);
|
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||||
|
__acquires(rq->lock);
|
||||||
for (;;) {
|
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||||
rq = task_rq(p);
|
|
||||||
raw_spin_lock(&rq->lock);
|
|
||||||
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
|
|
||||||
lockdep_pin_lock(&rq->lock);
|
|
||||||
return rq;
|
|
||||||
}
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
|
|
||||||
while (unlikely(task_on_rq_migrating(p)))
|
|
||||||
cpu_relax();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
|
|
||||||
*/
|
|
||||||
static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
|
|
||||||
__acquires(p->pi_lock)
|
__acquires(p->pi_lock)
|
||||||
__acquires(rq->lock)
|
__acquires(rq->lock);
|
||||||
{
|
|
||||||
struct rq *rq;
|
|
||||||
|
|
||||||
for (;;) {
|
static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
|
||||||
raw_spin_lock_irqsave(&p->pi_lock, *flags);
|
|
||||||
rq = task_rq(p);
|
|
||||||
raw_spin_lock(&rq->lock);
|
|
||||||
/*
|
|
||||||
* move_queued_task() task_rq_lock()
|
|
||||||
*
|
|
||||||
* ACQUIRE (rq->lock)
|
|
||||||
* [S] ->on_rq = MIGRATING [L] rq = task_rq()
|
|
||||||
* WMB (__set_task_cpu()) ACQUIRE (rq->lock);
|
|
||||||
* [S] ->cpu = new_cpu [L] task_rq()
|
|
||||||
* [L] ->on_rq
|
|
||||||
* RELEASE (rq->lock)
|
|
||||||
*
|
|
||||||
* If we observe the old cpu in task_rq_lock, the acquire of
|
|
||||||
* the old rq->lock will fully serialize against the stores.
|
|
||||||
*
|
|
||||||
* If we observe the new cpu in task_rq_lock, the acquire will
|
|
||||||
* pair with the WMB to ensure we must then also see migrating.
|
|
||||||
*/
|
|
||||||
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
|
|
||||||
lockdep_pin_lock(&rq->lock);
|
|
||||||
return rq;
|
|
||||||
}
|
|
||||||
raw_spin_unlock(&rq->lock);
|
|
||||||
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
|
||||||
|
|
||||||
while (unlikely(task_on_rq_migrating(p)))
|
|
||||||
cpu_relax();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __task_rq_unlock(struct rq *rq)
|
|
||||||
__releases(rq->lock)
|
__releases(rq->lock)
|
||||||
{
|
{
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, rf->cookie);
|
||||||
raw_spin_unlock(&rq->lock);
|
raw_spin_unlock(&rq->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
|
task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
|
||||||
__releases(rq->lock)
|
__releases(rq->lock)
|
||||||
__releases(p->pi_lock)
|
__releases(p->pi_lock)
|
||||||
{
|
{
|
||||||
lockdep_unpin_lock(&rq->lock);
|
lockdep_unpin_lock(&rq->lock, rf->cookie);
|
||||||
raw_spin_unlock(&rq->lock);
|
raw_spin_unlock(&rq->lock);
|
||||||
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
|
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
@ -1743,6 +1699,10 @@ enum rq_nohz_flag_bits {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
|
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
|
||||||
|
|
||||||
|
extern void nohz_balance_exit_idle(unsigned int cpu);
|
||||||
|
#else
|
||||||
|
static inline void nohz_balance_exit_idle(unsigned int cpu) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||||
|
@ -24,7 +24,7 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct task_struct *
|
static struct task_struct *
|
||||||
pick_next_task_stop(struct rq *rq, struct task_struct *prev)
|
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
|
||||||
{
|
{
|
||||||
struct task_struct *stop = rq->stop;
|
struct task_struct *stop = rq->stop;
|
||||||
|
|
||||||
|
@ -776,6 +776,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
|
|||||||
if (!ts->tick_stopped) {
|
if (!ts->tick_stopped) {
|
||||||
nohz_balance_enter_idle(cpu);
|
nohz_balance_enter_idle(cpu);
|
||||||
calc_load_enter_idle();
|
calc_load_enter_idle();
|
||||||
|
cpu_load_update_nohz_start();
|
||||||
|
|
||||||
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||||
ts->tick_stopped = 1;
|
ts->tick_stopped = 1;
|
||||||
@ -802,11 +803,11 @@ out:
|
|||||||
return tick;
|
return tick;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
|
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
|
||||||
{
|
{
|
||||||
/* Update jiffies first */
|
/* Update jiffies first */
|
||||||
tick_do_update_jiffies64(now);
|
tick_do_update_jiffies64(now);
|
||||||
update_cpu_load_nohz(active);
|
cpu_load_update_nohz_stop();
|
||||||
|
|
||||||
calc_load_exit_idle();
|
calc_load_exit_idle();
|
||||||
touch_softlockup_watchdog_sched();
|
touch_softlockup_watchdog_sched();
|
||||||
@ -833,7 +834,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
|
|||||||
if (can_stop_full_tick(ts))
|
if (can_stop_full_tick(ts))
|
||||||
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
|
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
|
||||||
else if (ts->tick_stopped)
|
else if (ts->tick_stopped)
|
||||||
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
|
tick_nohz_restart_sched_tick(ts, ktime_get());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1024,7 +1025,7 @@ void tick_nohz_idle_exit(void)
|
|||||||
tick_nohz_stop_idle(ts, now);
|
tick_nohz_stop_idle(ts, now);
|
||||||
|
|
||||||
if (ts->tick_stopped) {
|
if (ts->tick_stopped) {
|
||||||
tick_nohz_restart_sched_tick(ts, now, 0);
|
tick_nohz_restart_sched_tick(ts, now);
|
||||||
tick_nohz_account_idle_ticks(ts);
|
tick_nohz_account_idle_ticks(ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,9 +4,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
#include <linux/mmu_context.h>
|
#include <linux/mmu_context.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/sched.h>
|
|
||||||
|
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user