mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-12 07:14:03 +08:00
Changes in this cycle were:
- Cleanups for SCHED_DEADLINE - Tracing updates/fixes - CPU Accounting fixes - First wave of changes to optimize the overhead of the scheduler build, from the fast-headers tree - including placeholder *_api.h headers for later header split-ups. - Preempt-dynamic using static_branch() for ARM64 - Isolation housekeeping mask rework; preperatory for further changes - NUMA-balancing: deal with CPU-less nodes - NUMA-balancing: tune systems that have multiple LLC cache domains per node (eg. AMD) - Updates to RSEQ UAPI in preparation for glibc usage - Lots of RSEQ/selftests, for same - Add Suren as PSI co-maintainer Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmI5rg8RHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hGrw/+M3QOk6fH7G48wjlNnBvcOife6ls+Ni4k ixOAcF4JKoixO8HieU5vv0A7yf/83tAa6fpeXeMf1hkCGc0NSlmLtuIux+WOmoAL LzCyDEYfiP8KnVh0A1Tui/lK0+AkGo21O6ADhQE2gh8o2LpslOHQMzvtyekSzeeb mVxMYQN+QH0m518xdO2D8IQv9ctOYK0eGjmkqdNfntOlytypPZHeNel/tCzwklP/ dElJUjNiSKDlUgTBPtL3DfpoLOI/0mHF2p6NEXvNyULxSOqJTu8pv9Z2ADb2kKo1 0D56iXBDngMi9MHIJLgvzsA8gKzHLFSuPbpODDqkTZCa28vaMB9NYGhJ643NtEie IXTJEvF1rmNkcLcZlZxo0yjL0fjvPkczjw4Vj27gbrUQeEBfb4mfuI4BRmij63Ep qEkgQTJhduCqqrQP1rVyhwWZRk1JNcVug+F6N42qWW3fg1xhj0YSrLai2c9nPez6 3Zt98H8YGS1Z/JQomSw48iGXVqfTp/ETI7uU7jqHK8QcjzQ4lFK5H4GZpwuqGBZi NJJ1l97XMEas+rPHiwMEN7Z1DVhzJLCp8omEj12QU+tGLofxxwAuuOVat3CQWLRk f80Oya3TLEgd22hGIKDRmHa22vdWnNQyS0S15wJotawBzQf+n3auS9Q3/rh979+t ES/qvlGxTIs= =Z8uT -----END PGP SIGNATURE----- Merge tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: - Cleanups for SCHED_DEADLINE - Tracing updates/fixes - CPU Accounting fixes - First wave of changes to optimize the overhead of the scheduler build, from the fast-headers tree - including placeholder *_api.h headers for later header split-ups. - Preempt-dynamic using static_branch() for ARM64 - Isolation housekeeping mask rework; preperatory for further changes - NUMA-balancing: deal with CPU-less nodes - NUMA-balancing: tune systems that have multiple LLC cache domains per node (eg. AMD) - Updates to RSEQ UAPI in preparation for glibc usage - Lots of RSEQ/selftests, for same - Add Suren as PSI co-maintainer * tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (81 commits) sched/headers: ARM needs asm/paravirt_api_clock.h too sched/numa: Fix boot crash on arm64 systems headers/prep: Fix header to build standalone: <linux/psi.h> sched/headers: Only include <linux/entry-common.h> when CONFIG_GENERIC_ENTRY=y cgroup: Fix suspicious rcu_dereference_check() usage warning sched/preempt: Tell about PREEMPT_DYNAMIC on kernel headers sched/topology: Remove redundant variable and fix incorrect type in build_sched_domains sched/deadline,rt: Remove unused parameter from pick_next_[rt|dl]_entity() sched/deadline,rt: Remove unused functions for !CONFIG_SMP sched/deadline: Use __node_2_[pdl|dle]() and rb_first_cached() consistently sched/deadline: Merge dl_task_can_attach() and dl_cpu_busy() sched/deadline: Move bandwidth mgmt and reclaim functions into sched class source file sched/deadline: Remove unused def_dl_bandwidth sched/tracing: Report TASK_RTLOCK_WAIT tasks as TASK_UNINTERRUPTIBLE sched/tracing: Don't re-read p->state when emitting sched_switch event sched/rt: Plug rt_mutex_setprio() vs push_rt_task() race sched/cpuacct: Remove redundant RCU read lock sched/cpuacct: Optimize away RCU read lock sched/cpuacct: Fix charge percpu cpuusage sched/headers: Reorganize, clean up and optimize kernel/sched/sched.h dependencies ...
This commit is contained in:
commit
3fe2f7446f
@ -609,51 +609,7 @@ be migrated to a local memory node.
|
||||
The unmapping of pages and trapping faults incur additional overhead that
|
||||
ideally is offset by improved memory locality but there is no universal
|
||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||
feature should be disabled. Otherwise, if the system overhead from the
|
||||
feature is too high then the rate the kernel samples for NUMA hinting
|
||||
faults may be controlled by the `numa_balancing_scan_period_min_ms,
|
||||
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
|
||||
numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
|
||||
|
||||
|
||||
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
|
||||
===============================================================================================================================
|
||||
|
||||
|
||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
||||
detect if pages are properly placed or if the data should be migrated to a
|
||||
memory node local to where the task is running. Every "scan delay" the task
|
||||
scans the next "scan size" number of pages in its address space. When the
|
||||
end of the address space is reached the scanner restarts from the beginning.
|
||||
|
||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
||||
hence the scan rate of every task is adaptive and depends on historical
|
||||
behaviour. If pages are properly placed then the scan delay increases,
|
||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
||||
the higher the "scan size", the higher the scan rate.
|
||||
|
||||
Higher scan rates incur higher system overhead as page faults must be
|
||||
trapped and potentially data must be migrated. However, the higher the scan
|
||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
||||
workload pattern changes and minimises performance impact due to remote
|
||||
memory accesses. These sysctls control the thresholds for scan delays and
|
||||
the number of pages scanned.
|
||||
|
||||
``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the maximum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
|
||||
when it initially forks.
|
||||
|
||||
``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the minimum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
|
||||
scanned for a given scan.
|
||||
|
||||
feature should be disabled.
|
||||
|
||||
oops_all_cpu_backtrace
|
||||
======================
|
||||
|
@ -18,6 +18,7 @@ Linux Scheduler
|
||||
sched-nice-design
|
||||
sched-rt-group
|
||||
sched-stats
|
||||
sched-debug
|
||||
|
||||
text_files
|
||||
|
||||
|
54
Documentation/scheduler/sched-debug.rst
Normal file
54
Documentation/scheduler/sched-debug.rst
Normal file
@ -0,0 +1,54 @@
|
||||
=================
|
||||
Scheduler debugfs
|
||||
=================
|
||||
|
||||
Booting a kernel with CONFIG_SCHED_DEBUG=y will give access to
|
||||
scheduler specific debug files under /sys/kernel/debug/sched. Some of
|
||||
those files are described below.
|
||||
|
||||
numa_balancing
|
||||
==============
|
||||
|
||||
`numa_balancing` directory is used to hold files to control NUMA
|
||||
balancing feature. If the system overhead from the feature is too
|
||||
high then the rate the kernel samples for NUMA hinting faults may be
|
||||
controlled by the `scan_period_min_ms, scan_delay_ms,
|
||||
scan_period_max_ms, scan_size_mb` files.
|
||||
|
||||
|
||||
scan_period_min_ms, scan_delay_ms, scan_period_max_ms, scan_size_mb
|
||||
-------------------------------------------------------------------
|
||||
|
||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
||||
detect if pages are properly placed or if the data should be migrated to a
|
||||
memory node local to where the task is running. Every "scan delay" the task
|
||||
scans the next "scan size" number of pages in its address space. When the
|
||||
end of the address space is reached the scanner restarts from the beginning.
|
||||
|
||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
||||
hence the scan rate of every task is adaptive and depends on historical
|
||||
behaviour. If pages are properly placed then the scan delay increases,
|
||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
||||
the higher the "scan size", the higher the scan rate.
|
||||
|
||||
Higher scan rates incur higher system overhead as page faults must be
|
||||
trapped and potentially data must be migrated. However, the higher the scan
|
||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
||||
workload pattern changes and minimises performance impact due to remote
|
||||
memory accesses. These files control the thresholds for scan delays and
|
||||
the number of pages scanned.
|
||||
|
||||
``scan_period_min_ms`` is the minimum time in milliseconds to scan a
|
||||
tasks virtual memory. It effectively controls the maximum scanning
|
||||
rate for each task.
|
||||
|
||||
``scan_delay_ms`` is the starting "scan delay" used for a task when it
|
||||
initially forks.
|
||||
|
||||
``scan_period_max_ms`` is the maximum time in milliseconds to scan a
|
||||
tasks virtual memory. It effectively controls the minimum scanning
|
||||
rate for each task.
|
||||
|
||||
``scan_size_mb`` is how many megabytes worth of pages are scanned for
|
||||
a given scan.
|
@ -15566,6 +15566,7 @@ F: drivers/net/ppp/pptp.c
|
||||
|
||||
PRESSURE STALL INFORMATION (PSI)
|
||||
M: Johannes Weiner <hannes@cmpxchg.org>
|
||||
M: Suren Baghdasaryan <surenb@google.com>
|
||||
S: Maintained
|
||||
F: include/linux/psi*
|
||||
F: kernel/sched/psi.c
|
||||
|
37
arch/Kconfig
37
arch/Kconfig
@ -1293,12 +1293,41 @@ config HAVE_STATIC_CALL_INLINE
|
||||
|
||||
config HAVE_PREEMPT_DYNAMIC
|
||||
bool
|
||||
|
||||
config HAVE_PREEMPT_DYNAMIC_CALL
|
||||
bool
|
||||
depends on HAVE_STATIC_CALL
|
||||
depends on GENERIC_ENTRY
|
||||
select HAVE_PREEMPT_DYNAMIC
|
||||
help
|
||||
Select this if the architecture support boot time preempt setting
|
||||
on top of static calls. It is strongly advised to support inline
|
||||
static call to avoid any overhead.
|
||||
An architecture should select this if it can handle the preemption
|
||||
model being selected at boot time using static calls.
|
||||
|
||||
Where an architecture selects HAVE_STATIC_CALL_INLINE, any call to a
|
||||
preemption function will be patched directly.
|
||||
|
||||
Where an architecture does not select HAVE_STATIC_CALL_INLINE, any
|
||||
call to a preemption function will go through a trampoline, and the
|
||||
trampoline will be patched.
|
||||
|
||||
It is strongly advised to support inline static call to avoid any
|
||||
overhead.
|
||||
|
||||
config HAVE_PREEMPT_DYNAMIC_KEY
|
||||
bool
|
||||
depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO
|
||||
select HAVE_PREEMPT_DYNAMIC
|
||||
help
|
||||
An architecture should select this if it can handle the preemption
|
||||
model being selected at boot time using static keys.
|
||||
|
||||
Each preemption function will be given an early return based on a
|
||||
static key. This should have slightly lower overhead than non-inline
|
||||
static calls, as this effectively inlines each trampoline into the
|
||||
start of its callee. This may avoid redundant work, and may
|
||||
integrate better with CFI schemes.
|
||||
|
||||
This will have greater overhead than using inline static calls as
|
||||
the call to the preemption function cannot be entirely elided.
|
||||
|
||||
config ARCH_WANT_LD_ORPHAN_WARN
|
||||
bool
|
||||
|
1
arch/arm/include/asm/paravirt_api_clock.h
Normal file
1
arch/arm/include/asm/paravirt_api_clock.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/paravirt.h>
|
@ -194,6 +194,7 @@ config ARM64
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_PREEMPT_DYNAMIC_KEY
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
||||
select HAVE_FUNCTION_ARG_ACCESS_API
|
||||
|
1
arch/arm64/include/asm/paravirt_api_clock.h
Normal file
1
arch/arm64/include/asm/paravirt_api_clock.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/paravirt.h>
|
@ -2,6 +2,7 @@
|
||||
#ifndef __ASM_PREEMPT_H
|
||||
#define __ASM_PREEMPT_H
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
#define PREEMPT_NEED_RESCHED BIT(32)
|
||||
@ -80,10 +81,24 @@ static inline bool should_resched(int preempt_offset)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
|
||||
void preempt_schedule(void);
|
||||
#define __preempt_schedule() preempt_schedule()
|
||||
void preempt_schedule_notrace(void);
|
||||
#define __preempt_schedule_notrace() preempt_schedule_notrace()
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
void dynamic_preempt_schedule(void);
|
||||
#define __preempt_schedule() dynamic_preempt_schedule()
|
||||
void dynamic_preempt_schedule_notrace(void);
|
||||
#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
|
||||
|
||||
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
#define __preempt_schedule() preempt_schedule()
|
||||
#define __preempt_schedule_notrace() preempt_schedule_notrace()
|
||||
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||
#endif /* CONFIG_PREEMPTION */
|
||||
|
||||
#endif /* __ASM_PREEMPT_H */
|
||||
|
@ -223,9 +223,26 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
#define need_irq_preemption() \
|
||||
(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
||||
#else
|
||||
#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
#endif
|
||||
|
||||
static void __sched arm64_preempt_schedule_irq(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (!need_irq_preemption())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Note: thread_info::preempt_count includes both thread_info::count
|
||||
* and thread_info::need_resched, and is not equivalent to
|
||||
* preempt_count().
|
||||
*/
|
||||
if (READ_ONCE(current_thread_info()->preempt_count) != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
|
||||
@ -441,14 +458,7 @@ static __always_inline void __el1_irq(struct pt_regs *regs,
|
||||
do_interrupt_handler(regs, handler);
|
||||
irq_exit_rcu();
|
||||
|
||||
/*
|
||||
* Note: thread_info::preempt_count includes both thread_info::count
|
||||
* and thread_info::need_resched, and is not equivalent to
|
||||
* preempt_count().
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPTION) &&
|
||||
READ_ONCE(current_thread_info()->preempt_count) == 0)
|
||||
arm64_preempt_schedule_irq();
|
||||
arm64_preempt_schedule_irq();
|
||||
|
||||
exit_to_kernel_mode(regs);
|
||||
}
|
||||
|
@ -248,7 +248,7 @@ config X86
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_STATIC_CALL
|
||||
select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
|
||||
select HAVE_PREEMPT_DYNAMIC
|
||||
select HAVE_PREEMPT_DYNAMIC_CALL
|
||||
select HAVE_RSEQ
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
|
1
arch/x86/include/asm/paravirt_api_clock.h
Normal file
1
arch/x86/include/asm/paravirt_api_clock.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/paravirt.h>
|
@ -108,16 +108,18 @@ static __always_inline bool should_resched(int preempt_offset)
|
||||
extern asmlinkage void preempt_schedule(void);
|
||||
extern asmlinkage void preempt_schedule_thunk(void);
|
||||
|
||||
#define __preempt_schedule_func preempt_schedule_thunk
|
||||
#define preempt_schedule_dynamic_enabled preempt_schedule_thunk
|
||||
#define preempt_schedule_dynamic_disabled NULL
|
||||
|
||||
extern asmlinkage void preempt_schedule_notrace(void);
|
||||
extern asmlinkage void preempt_schedule_notrace_thunk(void);
|
||||
|
||||
#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk
|
||||
#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk
|
||||
#define preempt_schedule_notrace_dynamic_disabled NULL
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
|
||||
DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
|
||||
|
||||
#define __preempt_schedule() \
|
||||
do { \
|
||||
@ -125,7 +127,7 @@ do { \
|
||||
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
|
||||
} while (0)
|
||||
|
||||
DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
||||
DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
|
||||
|
||||
#define __preempt_schedule_notrace() \
|
||||
do { \
|
||||
|
@ -91,7 +91,7 @@ unsigned int aperfmperf_get_khz(int cpu)
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
return 0;
|
||||
|
||||
if (rcu_is_idle_cpu(cpu))
|
||||
@ -114,7 +114,7 @@ void arch_freq_prepare_all(void)
|
||||
return;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
continue;
|
||||
if (rcu_is_idle_cpu(cpu))
|
||||
continue; /* Idle CPUs are completely uninteresting. */
|
||||
@ -136,7 +136,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||
return 0;
|
||||
|
||||
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
||||
|
@ -8853,7 +8853,7 @@ int kvm_arch_init(void *opaque)
|
||||
}
|
||||
|
||||
if (pi_inject_timer == -1)
|
||||
pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
|
||||
pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
||||
|
||||
|
@ -275,7 +275,7 @@ static ssize_t print_cpus_isolated(struct device *dev,
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_andnot(isolated, cpu_possible_mask,
|
||||
housekeeping_cpumask(HK_FLAG_DOMAIN));
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
|
||||
|
||||
free_cpumask_var(isolated);
|
||||
|
@ -350,7 +350,6 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
||||
const struct pci_device_id *id)
|
||||
{
|
||||
int error, node, cpu;
|
||||
int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
|
||||
struct drv_dev_and_id ddi = { drv, dev, id };
|
||||
|
||||
/*
|
||||
@ -368,17 +367,29 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
||||
* device is probed from work_on_cpu() of the Physical device.
|
||||
*/
|
||||
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
|
||||
pci_physfn_is_probed(dev))
|
||||
pci_physfn_is_probed(dev)) {
|
||||
cpu = nr_cpu_ids;
|
||||
else
|
||||
} else {
|
||||
cpumask_var_t wq_domain_mask;
|
||||
|
||||
if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpumask_and(wq_domain_mask,
|
||||
housekeeping_cpumask(HK_TYPE_WQ),
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
|
||||
cpu = cpumask_any_and(cpumask_of_node(node),
|
||||
housekeeping_cpumask(hk_flags));
|
||||
wq_domain_mask);
|
||||
free_cpumask_var(wq_domain_mask);
|
||||
}
|
||||
|
||||
if (cpu < nr_cpu_ids)
|
||||
error = work_on_cpu(cpu, local_pci_probe, &ddi);
|
||||
else
|
||||
error = local_pci_probe(&ddi);
|
||||
|
||||
out:
|
||||
dev->is_probed = 0;
|
||||
cpu_hotplug_enable();
|
||||
return error;
|
||||
|
@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
#define task_css_set_check(task, __c) \
|
||||
rcu_dereference_check((task)->cgroups, \
|
||||
rcu_read_lock_sched_held() || \
|
||||
lockdep_is_held(&cgroup_mutex) || \
|
||||
lockdep_is_held(&css_set_lock) || \
|
||||
((task)->flags & PF_EXITING) || (__c))
|
||||
@ -791,11 +792,9 @@ static inline void cgroup_account_cputime(struct task_struct *task,
|
||||
|
||||
cpuacct_charge(task, delta_exec);
|
||||
|
||||
rcu_read_lock();
|
||||
cgrp = task_dfl_cgroup(task);
|
||||
if (cgroup_parent(cgrp))
|
||||
__cgroup_account_cputime(cgrp, delta_exec);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static inline void cgroup_account_cputime_field(struct task_struct *task,
|
||||
@ -806,11 +805,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
|
||||
|
||||
cpuacct_account_field(task, index, delta_exec);
|
||||
|
||||
rcu_read_lock();
|
||||
cgrp = task_dfl_cgroup(task);
|
||||
if (cgroup_parent(cgrp))
|
||||
__cgroup_account_cputime_field(cgrp, index, delta_exec);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUPS */
|
||||
|
1
include/linux/cgroup_api.h
Normal file
1
include/linux/cgroup_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/cgroup.h>
|
1
include/linux/cpumask_api.h
Normal file
1
include/linux/cpumask_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/cpumask.h>
|
@ -454,10 +454,21 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
|
||||
*
|
||||
* Conditional reschedule with additional sanity checks.
|
||||
*/
|
||||
void irqentry_exit_cond_resched(void);
|
||||
void raw_irqentry_exit_cond_resched(void);
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
|
||||
#define irqentry_exit_cond_resched_dynamic_disabled NULL
|
||||
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
void dynamic_irqentry_exit_cond_resched(void);
|
||||
#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
|
||||
#endif
|
||||
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
/**
|
||||
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
||||
|
1
include/linux/fs_api.h
Normal file
1
include/linux/fs_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/fs.h>
|
1
include/linux/gfp_api.h
Normal file
1
include/linux/gfp_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/gfp.h>
|
1
include/linux/hashtable_api.h
Normal file
1
include/linux/hashtable_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/hashtable.h>
|
1
include/linux/hrtimer_api.h
Normal file
1
include/linux/hrtimer_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/hrtimer.h>
|
@ -99,7 +99,7 @@ struct user;
|
||||
extern int __cond_resched(void);
|
||||
# define might_resched() __cond_resched()
|
||||
|
||||
#elif defined(CONFIG_PREEMPT_DYNAMIC)
|
||||
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
extern int __cond_resched(void);
|
||||
|
||||
@ -110,6 +110,11 @@ static __always_inline void might_resched(void)
|
||||
static_call_mod(might_resched)();
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
|
||||
extern int dynamic_might_resched(void);
|
||||
# define might_resched() dynamic_might_resched()
|
||||
|
||||
#else
|
||||
|
||||
# define might_resched() do { } while (0)
|
||||
|
1
include/linux/kobject_api.h
Normal file
1
include/linux/kobject_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/kobject.h>
|
1
include/linux/kref_api.h
Normal file
1
include/linux/kref_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/kref.h>
|
1
include/linux/ktime_api.h
Normal file
1
include/linux/ktime_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/ktime.h>
|
1
include/linux/llist_api.h
Normal file
1
include/linux/llist_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/llist.h>
|
1
include/linux/lockdep_api.h
Normal file
1
include/linux/lockdep_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/lockdep.h>
|
1
include/linux/mm_api.h
Normal file
1
include/linux/mm_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/mm.h>
|
1
include/linux/mutex_api.h
Normal file
1
include/linux/mutex_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/mutex.h>
|
1
include/linux/perf_event_api.h
Normal file
1
include/linux/perf_event_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/perf_event.h>
|
1
include/linux/pgtable_api.h
Normal file
1
include/linux/pgtable_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/pgtable.h>
|
@ -6,6 +6,7 @@
|
||||
#include <linux/psi_types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/cgroup-defs.h>
|
||||
|
||||
struct seq_file;
|
||||
struct css_set;
|
||||
|
@ -141,6 +141,9 @@ struct psi_trigger {
|
||||
* events to one per window
|
||||
*/
|
||||
u64 last_event_time;
|
||||
|
||||
/* Deferred event(s) from previous ratelimit window */
|
||||
bool pending_event;
|
||||
};
|
||||
|
||||
struct psi_group {
|
||||
|
1
include/linux/ptrace_api.h
Normal file
1
include/linux/ptrace_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/ptrace.h>
|
1
include/linux/rcuwait_api.h
Normal file
1
include/linux/rcuwait_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/rcuwait.h>
|
1
include/linux/refcount_api.h
Normal file
1
include/linux/refcount_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/refcount.h>
|
@ -1626,19 +1626,32 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
|
||||
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
|
||||
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
|
||||
|
||||
static inline unsigned int task_state_index(struct task_struct *tsk)
|
||||
static inline unsigned int __task_state_index(unsigned int tsk_state,
|
||||
unsigned int tsk_exit_state)
|
||||
{
|
||||
unsigned int tsk_state = READ_ONCE(tsk->__state);
|
||||
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
|
||||
unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
|
||||
|
||||
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
|
||||
|
||||
if (tsk_state == TASK_IDLE)
|
||||
state = TASK_REPORT_IDLE;
|
||||
|
||||
/*
|
||||
* We're lying here, but rather than expose a completely new task state
|
||||
* to userspace, we can make this appear as if the task has gone through
|
||||
* a regular rt_mutex_lock() call.
|
||||
*/
|
||||
if (tsk_state == TASK_RTLOCK_WAIT)
|
||||
state = TASK_UNINTERRUPTIBLE;
|
||||
|
||||
return fls(state);
|
||||
}
|
||||
|
||||
static inline unsigned int task_state_index(struct task_struct *tsk)
|
||||
{
|
||||
return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
|
||||
}
|
||||
|
||||
static inline char task_index_to_char(unsigned int state)
|
||||
{
|
||||
static const char state_char[] = "RSDTtXZPI";
|
||||
@ -2021,7 +2034,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
|
||||
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
|
||||
extern int __cond_resched(void);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
|
||||
|
||||
@ -2030,6 +2043,14 @@ static __always_inline int _cond_resched(void)
|
||||
return static_call_mod(cond_resched)();
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
extern int dynamic_cond_resched(void);
|
||||
|
||||
static __always_inline int _cond_resched(void)
|
||||
{
|
||||
return dynamic_cond_resched();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int _cond_resched(void)
|
||||
|
1
include/linux/sched/affinity.h
Normal file
1
include/linux/sched/affinity.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/sched.h>
|
1
include/linux/sched/cond_resched.h
Normal file
1
include/linux/sched/cond_resched.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/sched.h>
|
@ -6,6 +6,8 @@
|
||||
* NORMAL/BATCH tasks.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define MAX_DL_PRIO 0
|
||||
|
||||
static inline int dl_prio(int prio)
|
||||
|
@ -5,54 +5,55 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
enum hk_flags {
|
||||
HK_FLAG_TIMER = 1,
|
||||
HK_FLAG_RCU = (1 << 1),
|
||||
HK_FLAG_MISC = (1 << 2),
|
||||
HK_FLAG_SCHED = (1 << 3),
|
||||
HK_FLAG_TICK = (1 << 4),
|
||||
HK_FLAG_DOMAIN = (1 << 5),
|
||||
HK_FLAG_WQ = (1 << 6),
|
||||
HK_FLAG_MANAGED_IRQ = (1 << 7),
|
||||
HK_FLAG_KTHREAD = (1 << 8),
|
||||
enum hk_type {
|
||||
HK_TYPE_TIMER,
|
||||
HK_TYPE_RCU,
|
||||
HK_TYPE_MISC,
|
||||
HK_TYPE_SCHED,
|
||||
HK_TYPE_TICK,
|
||||
HK_TYPE_DOMAIN,
|
||||
HK_TYPE_WQ,
|
||||
HK_TYPE_MANAGED_IRQ,
|
||||
HK_TYPE_KTHREAD,
|
||||
HK_TYPE_MAX
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CPU_ISOLATION
|
||||
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
|
||||
extern int housekeeping_any_cpu(enum hk_flags flags);
|
||||
extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
|
||||
extern bool housekeeping_enabled(enum hk_flags flags);
|
||||
extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
|
||||
extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
|
||||
extern int housekeeping_any_cpu(enum hk_type type);
|
||||
extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
|
||||
extern bool housekeeping_enabled(enum hk_type type);
|
||||
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
|
||||
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
|
||||
extern void __init housekeeping_init(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline int housekeeping_any_cpu(enum hk_flags flags)
|
||||
static inline int housekeeping_any_cpu(enum hk_type type)
|
||||
{
|
||||
return smp_processor_id();
|
||||
}
|
||||
|
||||
static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
|
||||
static inline const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||
{
|
||||
return cpu_possible_mask;
|
||||
}
|
||||
|
||||
static inline bool housekeeping_enabled(enum hk_flags flags)
|
||||
static inline bool housekeeping_enabled(enum hk_type type)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void housekeeping_affine(struct task_struct *t,
|
||||
enum hk_flags flags) { }
|
||||
enum hk_type type) { }
|
||||
static inline void housekeeping_init(void) { }
|
||||
#endif /* CONFIG_CPU_ISOLATION */
|
||||
|
||||
static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
|
||||
static inline bool housekeeping_cpu(int cpu, enum hk_type type)
|
||||
{
|
||||
#ifdef CONFIG_CPU_ISOLATION
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
return housekeeping_test_cpu(cpu, flags);
|
||||
return housekeeping_test_cpu(cpu, type);
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
1
include/linux/sched/posix-timers.h
Normal file
1
include/linux/sched/posix-timers.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/posix-timers.h>
|
1
include/linux/sched/rseq_api.h
Normal file
1
include/linux/sched/rseq_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/rseq.h>
|
@ -45,10 +45,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
|
||||
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
extern unsigned int sysctl_sched_autogroup_enabled;
|
||||
#endif
|
||||
|
||||
extern int sysctl_sched_rr_timeslice;
|
||||
extern int sched_rr_timeslice;
|
||||
|
||||
|
1
include/linux/sched/task_flags.h
Normal file
1
include/linux/sched/task_flags.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/sched.h>
|
1
include/linux/sched/thread_info_api.h
Normal file
1
include/linux/sched/thread_info_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/thread_info.h>
|
@ -93,6 +93,7 @@ struct sched_domain {
|
||||
unsigned int busy_factor; /* less balancing by factor if busy */
|
||||
unsigned int imbalance_pct; /* No balance until over watermark */
|
||||
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
|
||||
unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */
|
||||
|
||||
int nohz_idle; /* NOHZ IDLE status */
|
||||
int flags; /* See SD_* */
|
||||
|
@ -5,6 +5,8 @@
|
||||
#ifndef LINUX_SCHED_CLOCK
|
||||
#define LINUX_SCHED_CLOCK
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_GENERIC_SCHED_CLOCK
|
||||
/**
|
||||
* struct clock_read_data - data required to read from sched_clock()
|
||||
|
1
include/linux/seqlock_api.h
Normal file
1
include/linux/seqlock_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/seqlock.h>
|
1
include/linux/softirq.h
Normal file
1
include/linux/softirq.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/interrupt.h>
|
1
include/linux/spinlock_api.h
Normal file
1
include/linux/spinlock_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/spinlock.h>
|
1
include/linux/swait_api.h
Normal file
1
include/linux/swait_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/swait.h>
|
1
include/linux/syscalls_api.h
Normal file
1
include/linux/syscalls_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/syscalls.h>
|
1
include/linux/u64_stats_sync_api.h
Normal file
1
include/linux/u64_stats_sync_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/u64_stats_sync.h>
|
1
include/linux/wait_api.h
Normal file
1
include/linux/wait_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/wait.h>
|
1
include/linux/workqueue_api.h
Normal file
1
include/linux/workqueue_api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <linux/workqueue.h>
|
@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
|
||||
TP_ARGS(p));
|
||||
|
||||
#ifdef CREATE_TRACE_POINTS
|
||||
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
|
||||
static inline long __trace_sched_switch_state(bool preempt,
|
||||
unsigned int prev_state,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned int state;
|
||||
|
||||
@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
|
||||
* it for left shift operation to get the correct task->state
|
||||
* mapping.
|
||||
*/
|
||||
state = task_state_index(p);
|
||||
state = __task_state_index(prev_state, p->exit_state);
|
||||
|
||||
return state ? (1 << (state - 1)) : state;
|
||||
}
|
||||
@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
|
||||
TRACE_EVENT(sched_switch,
|
||||
|
||||
TP_PROTO(bool preempt,
|
||||
unsigned int prev_state,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next),
|
||||
|
||||
TP_ARGS(preempt, prev, next),
|
||||
TP_ARGS(preempt, prev_state, prev, next),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, prev_comm, TASK_COMM_LEN )
|
||||
@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch,
|
||||
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
||||
__entry->prev_pid = prev->pid;
|
||||
__entry->prev_prio = prev->prio;
|
||||
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
|
||||
__entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
|
||||
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
||||
__entry->next_pid = next->pid;
|
||||
__entry->next_prio = next->prio;
|
||||
|
@ -105,23 +105,11 @@ struct rseq {
|
||||
* Read and set by the kernel. Set by user-space with single-copy
|
||||
* atomicity semantics. This field should only be updated by the
|
||||
* thread which registered this data structure. Aligned on 64-bit.
|
||||
*
|
||||
* 32-bit architectures should update the low order bits of the
|
||||
* rseq_cs field, leaving the high order bits initialized to 0.
|
||||
*/
|
||||
union {
|
||||
__u64 ptr64;
|
||||
#ifdef __LP64__
|
||||
__u64 ptr;
|
||||
#else
|
||||
struct {
|
||||
#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
|
||||
__u32 padding; /* Initialized to zero. */
|
||||
__u32 ptr32;
|
||||
#else /* LITTLE */
|
||||
__u32 ptr32;
|
||||
__u32 padding; /* Initialized to zero. */
|
||||
#endif /* ENDIAN */
|
||||
} ptr;
|
||||
#endif
|
||||
} rseq_cs;
|
||||
__u64 rseq_cs;
|
||||
|
||||
/*
|
||||
* Restartable sequences flags field.
|
||||
|
@ -31,7 +31,8 @@ quiet_cmd_compile.h = CHK $@
|
||||
cmd_compile.h = \
|
||||
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
|
||||
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
|
||||
"$(CONFIG_PREEMPT_RT)" "$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
|
||||
"$(CONFIG_PREEMPT_DYNAMIC)" "$(CONFIG_PREEMPT_RT)" \
|
||||
"$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
|
||||
|
||||
include/generated/compile.h: FORCE
|
||||
$(call cmd,compile.h)
|
||||
|
@ -96,8 +96,9 @@ config PREEMPTION
|
||||
config PREEMPT_DYNAMIC
|
||||
bool "Preemption behaviour defined on boot"
|
||||
depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
|
||||
select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
|
||||
select PREEMPT_BUILD
|
||||
default y
|
||||
default y if HAVE_PREEMPT_DYNAMIC_CALL
|
||||
help
|
||||
This option allows to define the preemption model on the kernel
|
||||
command line parameter and thus override the default preemption
|
||||
|
@ -833,7 +833,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||
update_domain_attr_tree(dattr, &top_cpuset);
|
||||
}
|
||||
cpumask_and(doms[0], top_cpuset.effective_cpus,
|
||||
housekeeping_cpumask(HK_FLAG_DOMAIN));
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
|
||||
goto done;
|
||||
}
|
||||
@ -863,7 +863,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||
if (!cpumask_empty(cp->cpus_allowed) &&
|
||||
!(is_sched_load_balance(cp) &&
|
||||
cpumask_intersects(cp->cpus_allowed,
|
||||
housekeeping_cpumask(HK_FLAG_DOMAIN))))
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN))))
|
||||
continue;
|
||||
|
||||
if (root_load_balance &&
|
||||
@ -952,7 +952,7 @@ restart:
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpumask_or(dp, dp, b->effective_cpus);
|
||||
cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
|
||||
cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
if (dattr)
|
||||
update_domain_attr_tree(dattr + nslot, b);
|
||||
|
||||
|
@ -1489,8 +1489,8 @@ int freeze_secondary_cpus(int primary)
|
||||
cpu_maps_update_begin();
|
||||
if (primary == -1) {
|
||||
primary = cpumask_first(cpu_online_mask);
|
||||
if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
|
||||
primary = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
|
||||
primary = housekeeping_any_cpu(HK_TYPE_TIMER);
|
||||
} else {
|
||||
if (!cpu_online(primary))
|
||||
primary = cpumask_first(cpu_online_mask);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/tick.h>
|
||||
@ -394,7 +395,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void irqentry_exit_cond_resched(void)
|
||||
void raw_irqentry_exit_cond_resched(void)
|
||||
{
|
||||
if (!preempt_count()) {
|
||||
/* Sanity check RCU and thread stack */
|
||||
@ -406,7 +407,17 @@ void irqentry_exit_cond_resched(void)
|
||||
}
|
||||
}
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
void dynamic_irqentry_exit_cond_resched(void)
|
||||
{
|
||||
if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
||||
return;
|
||||
raw_irqentry_exit_cond_resched();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||
@ -434,13 +445,9 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
if (IS_ENABLED(CONFIG_PREEMPTION)) {
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
static_call(irqentry_exit_cond_resched)();
|
||||
#else
|
||||
if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
irqentry_exit_cond_resched();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Covers both tracing and lockdep */
|
||||
trace_hardirqs_on();
|
||||
instrumentation_end();
|
||||
|
@ -176,10 +176,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu)
|
||||
{
|
||||
const struct cpumask *hk_mask;
|
||||
|
||||
if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
|
||||
if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
|
||||
return false;
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
|
||||
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
|
||||
return false;
|
||||
|
||||
|
@ -247,13 +247,13 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
|
||||
* online.
|
||||
*/
|
||||
if (irqd_affinity_is_managed(data) &&
|
||||
housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
|
||||
housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
|
||||
const struct cpumask *hk_mask, *prog_mask;
|
||||
|
||||
static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
|
||||
static struct cpumask tmp_mask;
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
|
||||
|
||||
raw_spin_lock(&tmp_mask_lock);
|
||||
cpumask_and(&tmp_mask, mask, hk_mask);
|
||||
|
@ -356,7 +356,7 @@ static int kthread(void *_create)
|
||||
* back to default in case they have been changed.
|
||||
*/
|
||||
sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
||||
set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
|
||||
set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
@ -722,7 +722,7 @@ int kthreadd(void *unused)
|
||||
/* Setup a clean context for our children to inherit. */
|
||||
set_task_comm(tsk, "kthreadd");
|
||||
ignore_signals(tsk);
|
||||
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_FLAG_KTHREAD));
|
||||
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
set_mems_allowed(node_states[N_MEMORY]);
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
@ -496,7 +496,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
|
||||
struct rcu_tasks *rtp = arg;
|
||||
|
||||
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
|
||||
housekeeping_affine(current, HK_FLAG_RCU);
|
||||
housekeeping_affine(current, HK_TYPE_RCU);
|
||||
WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
|
||||
|
||||
/*
|
||||
|
@ -1218,9 +1218,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
||||
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
|
||||
cpu != outgoingcpu)
|
||||
cpumask_set_cpu(cpu, cm);
|
||||
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
|
||||
if (cpumask_empty(cm))
|
||||
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
|
||||
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
|
||||
set_cpus_allowed_ptr(t, cm);
|
||||
mutex_unlock(&rnp->boost_kthread_mutex);
|
||||
free_cpumask_var(cm);
|
||||
@ -1296,7 +1296,7 @@ static void rcu_bind_gp_kthread(void)
|
||||
{
|
||||
if (!tick_nohz_full_enabled())
|
||||
return;
|
||||
housekeeping_affine(current, HK_FLAG_RCU);
|
||||
housekeeping_affine(current, HK_TYPE_RCU);
|
||||
}
|
||||
|
||||
/* Record the current task on dyntick-idle entry. */
|
||||
|
@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
|
||||
int ret;
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
if (get_user(ptr, &t->rseq->rseq_cs.ptr64))
|
||||
if (get_user(ptr, &t->rseq->rseq_cs))
|
||||
return -EFAULT;
|
||||
#else
|
||||
if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
|
||||
if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
if (!ptr) {
|
||||
@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t)
|
||||
* Set rseq_cs to NULL.
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
return put_user(0UL, &t->rseq->rseq_cs.ptr64);
|
||||
return put_user(0UL, &t->rseq->rseq_cs);
|
||||
#else
|
||||
if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
|
||||
if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
#endif
|
||||
|
@ -1,7 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
|
||||
endif
|
||||
|
||||
# The compilers are complaining about unused variables inside an if(0) scope
|
||||
# block. This is daft, shut them up.
|
||||
@ -25,18 +22,13 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle.o fair.o rt.o deadline.o
|
||||
obj-y += wait.o wait_bit.o swait.o completion.o
|
||||
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
||||
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
|
||||
obj-$(CONFIG_MEMBARRIER) += membarrier.o
|
||||
obj-$(CONFIG_CPU_ISOLATION) += isolation.o
|
||||
obj-$(CONFIG_PSI) += psi.o
|
||||
obj-$(CONFIG_SCHED_CORE) += core_sched.o
|
||||
#
|
||||
# Build efficiency:
|
||||
#
|
||||
# These compilation units have roughly the same size and complexity - so their
|
||||
# build parallelizes well and finishes roughly at once:
|
||||
#
|
||||
obj-y += core.o
|
||||
obj-y += fair.o
|
||||
obj-y += build_policy.o
|
||||
obj-y += build_utility.o
|
||||
|
@ -1,14 +1,35 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Auto-group scheduling implementation:
|
||||
*/
|
||||
#include <linux/nospec.h>
|
||||
#include "sched.h"
|
||||
|
||||
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
||||
static struct autogroup autogroup_default;
|
||||
static atomic_t autogroup_seq_nr;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static struct ctl_table sched_autogroup_sysctls[] = {
|
||||
{
|
||||
.procname = "sched_autogroup_enabled",
|
||||
.data = &sysctl_sched_autogroup_enabled,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static void __init sched_autogroup_sysctl_init(void)
|
||||
{
|
||||
register_sysctl_init("kernel", sched_autogroup_sysctls);
|
||||
}
|
||||
#else
|
||||
#define sched_autogroup_sysctl_init() do { } while (0)
|
||||
#endif
|
||||
|
||||
void __init autogroup_init(struct task_struct *init_task)
|
||||
{
|
||||
autogroup_default.tg = &root_task_group;
|
||||
@ -198,6 +219,7 @@ void sched_autogroup_exit(struct signal_struct *sig)
|
||||
static int __init setup_autogroup(char *str)
|
||||
{
|
||||
sysctl_sched_autogroup_enabled = 0;
|
||||
sched_autogroup_sysctl_init();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -1,4 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _KERNEL_SCHED_AUTOGROUP_H
|
||||
#define _KERNEL_SCHED_AUTOGROUP_H
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
|
||||
struct autogroup {
|
||||
@ -27,6 +30,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
|
||||
static inline struct task_group *
|
||||
autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
||||
{
|
||||
extern unsigned int sysctl_sched_autogroup_enabled;
|
||||
int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
|
||||
|
||||
if (enabled && task_wants_autogroup(p, tg))
|
||||
@ -58,3 +62,5 @@ static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SCHED_AUTOGROUP */
|
||||
|
||||
#endif /* _KERNEL_SCHED_AUTOGROUP_H */
|
||||
|
52
kernel/sched/build_policy.c
Normal file
52
kernel/sched/build_policy.c
Normal file
@ -0,0 +1,52 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* These are the scheduling policy related scheduler files, built
|
||||
* in a single compilation unit for build efficiency reasons.
|
||||
*
|
||||
* ( Incidentally, the size of the compilation unit is roughly
|
||||
* comparable to core.c and fair.c, the other two big
|
||||
* compilation units. This helps balance build time, while
|
||||
* coalescing source files to amortize header inclusion
|
||||
* cost. )
|
||||
*
|
||||
* core.c and fair.c are built separately.
|
||||
*/
|
||||
|
||||
/* Headers: */
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/posix-timers.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/seqlock_api.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/tsacct_kern.h>
|
||||
#include <linux/vtime.h>
|
||||
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include "autogroup.h"
|
||||
#include "stats.h"
|
||||
#include "pelt.h"
|
||||
|
||||
/* Source code modules: */
|
||||
|
||||
#include "idle.c"
|
||||
|
||||
#include "rt.c"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# include "cpudeadline.c"
|
||||
# include "pelt.c"
|
||||
#endif
|
||||
|
||||
#include "cputime.c"
|
||||
#include "deadline.c"
|
||||
|
109
kernel/sched/build_utility.c
Normal file
109
kernel/sched/build_utility.c
Normal file
@ -0,0 +1,109 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* These are various utility functions of the scheduler,
|
||||
* built in a single compilation unit for build efficiency reasons.
|
||||
*
|
||||
* ( Incidentally, the size of the compilation unit is roughly
|
||||
* comparable to core.c, fair.c, smp.c and policy.c, the other
|
||||
* big compilation units. This helps balance build time, while
|
||||
* coalescing source files to amortize header inclusion
|
||||
* cost. )
|
||||
*/
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/rseq_api.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/energy_model.h>
|
||||
#include <linux/hashtable_api.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kobject_api.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/ptrace_api.h>
|
||||
#include <linux/sched_clock.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/spinlock_api.h>
|
||||
#include <linux/swait_api.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/wait_api.h>
|
||||
#include <linux/workqueue_api.h>
|
||||
|
||||
#include <uapi/linux/prctl.h>
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include "sched-pelt.h"
|
||||
#include "stats.h"
|
||||
#include "autogroup.h"
|
||||
|
||||
#include "clock.c"
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
# include "cpuacct.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
# include "cpufreq.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
|
||||
# include "cpufreq_schedutil.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
# include "debug.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
# include "stats.c"
|
||||
#endif
|
||||
|
||||
#include "loadavg.c"
|
||||
#include "completion.c"
|
||||
#include "swait.c"
|
||||
#include "wait_bit.c"
|
||||
#include "wait.c"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# include "cpupri.c"
|
||||
# include "stop_task.c"
|
||||
# include "topology.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
# include "core_sched.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PSI
|
||||
# include "psi.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMBARRIER
|
||||
# include "membarrier.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_ISOLATION
|
||||
# include "isolation.c"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
# include "autogroup.c"
|
||||
#endif
|
@ -53,15 +53,13 @@
|
||||
* that is otherwise invisible (TSC gets stopped).
|
||||
*
|
||||
*/
|
||||
#include "sched.h"
|
||||
#include <linux/sched_clock.h>
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
* This is default implementation.
|
||||
* Architectures and sub-architectures can override this.
|
||||
*/
|
||||
unsigned long long __weak sched_clock(void)
|
||||
notrace unsigned long long __weak sched_clock(void)
|
||||
{
|
||||
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
|
||||
* (NSEC_PER_SEC / HZ);
|
||||
@ -95,28 +93,28 @@ struct sched_clock_data {
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
||||
|
||||
static inline struct sched_clock_data *this_scd(void)
|
||||
notrace static inline struct sched_clock_data *this_scd(void)
|
||||
{
|
||||
return this_cpu_ptr(&sched_clock_data);
|
||||
}
|
||||
|
||||
static inline struct sched_clock_data *cpu_sdc(int cpu)
|
||||
notrace static inline struct sched_clock_data *cpu_sdc(int cpu)
|
||||
{
|
||||
return &per_cpu(sched_clock_data, cpu);
|
||||
}
|
||||
|
||||
int sched_clock_stable(void)
|
||||
notrace int sched_clock_stable(void)
|
||||
{
|
||||
return static_branch_likely(&__sched_clock_stable);
|
||||
}
|
||||
|
||||
static void __scd_stamp(struct sched_clock_data *scd)
|
||||
notrace static void __scd_stamp(struct sched_clock_data *scd)
|
||||
{
|
||||
scd->tick_gtod = ktime_get_ns();
|
||||
scd->tick_raw = sched_clock();
|
||||
}
|
||||
|
||||
static void __set_sched_clock_stable(void)
|
||||
notrace static void __set_sched_clock_stable(void)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
|
||||
@ -151,7 +149,7 @@ static void __set_sched_clock_stable(void)
|
||||
* The only way to fully avoid random clock jumps is to boot with:
|
||||
* "tsc=unstable".
|
||||
*/
|
||||
static void __sched_clock_work(struct work_struct *work)
|
||||
notrace static void __sched_clock_work(struct work_struct *work)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
int cpu;
|
||||
@ -177,7 +175,7 @@ static void __sched_clock_work(struct work_struct *work)
|
||||
|
||||
static DECLARE_WORK(sched_clock_work, __sched_clock_work);
|
||||
|
||||
static void __clear_sched_clock_stable(void)
|
||||
notrace static void __clear_sched_clock_stable(void)
|
||||
{
|
||||
if (!sched_clock_stable())
|
||||
return;
|
||||
@ -186,7 +184,7 @@ static void __clear_sched_clock_stable(void)
|
||||
schedule_work(&sched_clock_work);
|
||||
}
|
||||
|
||||
void clear_sched_clock_stable(void)
|
||||
notrace void clear_sched_clock_stable(void)
|
||||
{
|
||||
__sched_clock_stable_early = 0;
|
||||
|
||||
@ -196,7 +194,7 @@ void clear_sched_clock_stable(void)
|
||||
__clear_sched_clock_stable();
|
||||
}
|
||||
|
||||
static void __sched_clock_gtod_offset(void)
|
||||
notrace static void __sched_clock_gtod_offset(void)
|
||||
{
|
||||
struct sched_clock_data *scd = this_scd();
|
||||
|
||||
@ -246,12 +244,12 @@ late_initcall(sched_clock_init_late);
|
||||
* min, max except they take wrapping into account
|
||||
*/
|
||||
|
||||
static inline u64 wrap_min(u64 x, u64 y)
|
||||
notrace static inline u64 wrap_min(u64 x, u64 y)
|
||||
{
|
||||
return (s64)(x - y) < 0 ? x : y;
|
||||
}
|
||||
|
||||
static inline u64 wrap_max(u64 x, u64 y)
|
||||
notrace static inline u64 wrap_max(u64 x, u64 y)
|
||||
{
|
||||
return (s64)(x - y) > 0 ? x : y;
|
||||
}
|
||||
@ -262,7 +260,7 @@ static inline u64 wrap_max(u64 x, u64 y)
|
||||
* - filter out backward motion
|
||||
* - use the GTOD tick value to create a window to filter crazy TSC values
|
||||
*/
|
||||
static u64 sched_clock_local(struct sched_clock_data *scd)
|
||||
notrace static u64 sched_clock_local(struct sched_clock_data *scd)
|
||||
{
|
||||
u64 now, clock, old_clock, min_clock, max_clock, gtod;
|
||||
s64 delta;
|
||||
@ -295,7 +293,7 @@ again:
|
||||
return clock;
|
||||
}
|
||||
|
||||
static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||
notrace static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||
{
|
||||
struct sched_clock_data *my_scd = this_scd();
|
||||
u64 this_clock, remote_clock;
|
||||
@ -362,7 +360,7 @@ again:
|
||||
*
|
||||
* See cpu_clock().
|
||||
*/
|
||||
u64 sched_clock_cpu(int cpu)
|
||||
notrace u64 sched_clock_cpu(int cpu)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
u64 clock;
|
||||
@ -386,7 +384,7 @@ u64 sched_clock_cpu(int cpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_clock_cpu);
|
||||
|
||||
void sched_clock_tick(void)
|
||||
notrace void sched_clock_tick(void)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
|
||||
@ -403,7 +401,7 @@ void sched_clock_tick(void)
|
||||
sched_clock_local(scd);
|
||||
}
|
||||
|
||||
void sched_clock_tick_stable(void)
|
||||
notrace void sched_clock_tick_stable(void)
|
||||
{
|
||||
if (!sched_clock_stable())
|
||||
return;
|
||||
@ -423,7 +421,7 @@ void sched_clock_tick_stable(void)
|
||||
/*
|
||||
* We are going deep-idle (irqs are disabled):
|
||||
*/
|
||||
void sched_clock_idle_sleep_event(void)
|
||||
notrace void sched_clock_idle_sleep_event(void)
|
||||
{
|
||||
sched_clock_cpu(smp_processor_id());
|
||||
}
|
||||
@ -432,7 +430,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
|
||||
/*
|
||||
* We just idled; resync with ktime.
|
||||
*/
|
||||
void sched_clock_idle_wakeup_event(void)
|
||||
notrace void sched_clock_idle_wakeup_event(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -458,7 +456,7 @@ void __init sched_clock_init(void)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
u64 sched_clock_cpu(int cpu)
|
||||
notrace u64 sched_clock_cpu(int cpu)
|
||||
{
|
||||
if (!static_branch_likely(&sched_clock_running))
|
||||
return 0;
|
||||
@ -476,7 +474,7 @@ u64 sched_clock_cpu(int cpu)
|
||||
* On bare metal this function should return the same as local_clock.
|
||||
* Architectures and sub-architectures can override this.
|
||||
*/
|
||||
u64 __weak running_clock(void)
|
||||
notrace u64 __weak running_clock(void)
|
||||
{
|
||||
return local_clock();
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Generic wait-for-completion handler;
|
||||
*
|
||||
@ -11,7 +12,6 @@
|
||||
* typically be used for exclusion which gives rise to priority inversion.
|
||||
* Waiting for completion is a typically sync point, but not an exclusion point.
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
|
@ -6,27 +6,91 @@
|
||||
*
|
||||
* Copyright (C) 1991-2002 Linus Torvalds
|
||||
*/
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/sched.h>
|
||||
#undef CREATE_TRACE_POINTS
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hrtimer_api.h>
|
||||
#include <linux/ktime_api.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/syscalls_api.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/pgtable_api.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/spinlock_api.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/lockdep_api.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/softirq.h>
|
||||
#include <linux/refcount_api.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cond_resched.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/nohz.h>
|
||||
#include <linux/sched/rseq_api.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/delayacct.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/ioprio.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/kcov.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/llist_api.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/mutex_api.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/perf_event_api.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/rcuwait_api.h>
|
||||
#include <linux/sched/wake_q.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/vtime.h>
|
||||
#include <linux/wait_api.h>
|
||||
#include <linux/workqueue_api.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
# ifdef CONFIG_GENERIC_ENTRY
|
||||
# include <linux/entry-common.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <linux/sched/rseq_api.h>
|
||||
#include <trace/events/sched.h>
|
||||
#undef CREATE_TRACE_POINTS
|
||||
|
||||
#include "sched.h"
|
||||
#include "stats.h"
|
||||
#include "autogroup.h"
|
||||
|
||||
#include "autogroup.h"
|
||||
#include "pelt.h"
|
||||
#include "smp.h"
|
||||
#include "stats.h"
|
||||
|
||||
#include "../workqueue_internal.h"
|
||||
#include "../../fs/io-wq.h"
|
||||
#include "../smpboot.h"
|
||||
|
||||
#include "pelt.h"
|
||||
#include "smp.h"
|
||||
|
||||
/*
|
||||
* Export tracepoints that act as a bare tracehook (ie: have no trace event
|
||||
* associated with them) to allow external modules to probe them.
|
||||
@ -36,6 +100,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
|
||||
@ -1024,13 +1089,13 @@ int get_nohz_timer_target(void)
|
||||
struct sched_domain *sd;
|
||||
const struct cpumask *hk_mask;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) {
|
||||
if (!idle_cpu(cpu))
|
||||
return cpu;
|
||||
default_cpu = cpu;
|
||||
}
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_FLAG_TIMER);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd) {
|
||||
@ -1046,7 +1111,7 @@ int get_nohz_timer_target(void)
|
||||
}
|
||||
|
||||
if (default_cpu == -1)
|
||||
default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
|
||||
cpu = default_cpu;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
@ -4834,7 +4899,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||
{
|
||||
struct rq *rq = this_rq();
|
||||
struct mm_struct *mm = rq->prev_mm;
|
||||
long prev_state;
|
||||
unsigned int prev_state;
|
||||
|
||||
/*
|
||||
* The previous task will have left us with a preempt_count of 2
|
||||
@ -5379,7 +5444,7 @@ static void sched_tick_start(int cpu)
|
||||
int os;
|
||||
struct tick_work *twork;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
@ -5400,7 +5465,7 @@ static void sched_tick_stop(int cpu)
|
||||
struct tick_work *twork;
|
||||
int os;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
@ -6298,7 +6363,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
||||
migrate_disable_switch(rq, prev);
|
||||
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
|
||||
|
||||
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
|
||||
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
|
||||
|
||||
/* Also unlocks the rq: */
|
||||
rq = context_switch(rq, prev, next, &rf);
|
||||
@ -6490,17 +6555,31 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
|
||||
*/
|
||||
if (likely(!preemptible()))
|
||||
return;
|
||||
|
||||
preempt_schedule_common();
|
||||
}
|
||||
NOKPROBE_SYMBOL(preempt_schedule);
|
||||
EXPORT_SYMBOL(preempt_schedule);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
|
||||
EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#ifndef preempt_schedule_dynamic_enabled
|
||||
#define preempt_schedule_dynamic_enabled preempt_schedule
|
||||
#define preempt_schedule_dynamic_disabled NULL
|
||||
#endif
|
||||
DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
|
||||
EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
|
||||
void __sched notrace dynamic_preempt_schedule(void)
|
||||
{
|
||||
if (!static_branch_unlikely(&sk_dynamic_preempt_schedule))
|
||||
return;
|
||||
preempt_schedule();
|
||||
}
|
||||
NOKPROBE_SYMBOL(dynamic_preempt_schedule);
|
||||
EXPORT_SYMBOL(dynamic_preempt_schedule);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* preempt_schedule_notrace - preempt_schedule called by tracing
|
||||
@ -6555,148 +6634,28 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
|
||||
EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#ifndef preempt_schedule_notrace_dynamic_enabled
|
||||
#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace
|
||||
#define preempt_schedule_notrace_dynamic_disabled NULL
|
||||
#endif
|
||||
DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
|
||||
EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
|
||||
void __sched notrace dynamic_preempt_schedule_notrace(void)
|
||||
{
|
||||
if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace))
|
||||
return;
|
||||
preempt_schedule_notrace();
|
||||
}
|
||||
NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace);
|
||||
EXPORT_SYMBOL(dynamic_preempt_schedule_notrace);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_PREEMPTION */
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
#include <linux/entry-common.h>
|
||||
|
||||
/*
|
||||
* SC:cond_resched
|
||||
* SC:might_resched
|
||||
* SC:preempt_schedule
|
||||
* SC:preempt_schedule_notrace
|
||||
* SC:irqentry_exit_cond_resched
|
||||
*
|
||||
*
|
||||
* NONE:
|
||||
* cond_resched <- __cond_resched
|
||||
* might_resched <- RET0
|
||||
* preempt_schedule <- NOP
|
||||
* preempt_schedule_notrace <- NOP
|
||||
* irqentry_exit_cond_resched <- NOP
|
||||
*
|
||||
* VOLUNTARY:
|
||||
* cond_resched <- __cond_resched
|
||||
* might_resched <- __cond_resched
|
||||
* preempt_schedule <- NOP
|
||||
* preempt_schedule_notrace <- NOP
|
||||
* irqentry_exit_cond_resched <- NOP
|
||||
*
|
||||
* FULL:
|
||||
* cond_resched <- RET0
|
||||
* might_resched <- RET0
|
||||
* preempt_schedule <- preempt_schedule
|
||||
* preempt_schedule_notrace <- preempt_schedule_notrace
|
||||
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
||||
*/
|
||||
|
||||
enum {
|
||||
preempt_dynamic_undefined = -1,
|
||||
preempt_dynamic_none,
|
||||
preempt_dynamic_voluntary,
|
||||
preempt_dynamic_full,
|
||||
};
|
||||
|
||||
int preempt_dynamic_mode = preempt_dynamic_undefined;
|
||||
|
||||
int sched_dynamic_mode(const char *str)
|
||||
{
|
||||
if (!strcmp(str, "none"))
|
||||
return preempt_dynamic_none;
|
||||
|
||||
if (!strcmp(str, "voluntary"))
|
||||
return preempt_dynamic_voluntary;
|
||||
|
||||
if (!strcmp(str, "full"))
|
||||
return preempt_dynamic_full;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void sched_dynamic_update(int mode)
|
||||
{
|
||||
/*
|
||||
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
||||
* the ZERO state, which is invalid.
|
||||
*/
|
||||
static_call_update(cond_resched, __cond_resched);
|
||||
static_call_update(might_resched, __cond_resched);
|
||||
static_call_update(preempt_schedule, __preempt_schedule_func);
|
||||
static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
||||
static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
||||
|
||||
switch (mode) {
|
||||
case preempt_dynamic_none:
|
||||
static_call_update(cond_resched, __cond_resched);
|
||||
static_call_update(might_resched, (void *)&__static_call_return0);
|
||||
static_call_update(preempt_schedule, NULL);
|
||||
static_call_update(preempt_schedule_notrace, NULL);
|
||||
static_call_update(irqentry_exit_cond_resched, NULL);
|
||||
pr_info("Dynamic Preempt: none\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_voluntary:
|
||||
static_call_update(cond_resched, __cond_resched);
|
||||
static_call_update(might_resched, __cond_resched);
|
||||
static_call_update(preempt_schedule, NULL);
|
||||
static_call_update(preempt_schedule_notrace, NULL);
|
||||
static_call_update(irqentry_exit_cond_resched, NULL);
|
||||
pr_info("Dynamic Preempt: voluntary\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_full:
|
||||
static_call_update(cond_resched, (void *)&__static_call_return0);
|
||||
static_call_update(might_resched, (void *)&__static_call_return0);
|
||||
static_call_update(preempt_schedule, __preempt_schedule_func);
|
||||
static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
||||
static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
break;
|
||||
}
|
||||
|
||||
preempt_dynamic_mode = mode;
|
||||
}
|
||||
|
||||
static int __init setup_preempt_mode(char *str)
|
||||
{
|
||||
int mode = sched_dynamic_mode(str);
|
||||
if (mode < 0) {
|
||||
pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
sched_dynamic_update(mode);
|
||||
return 1;
|
||||
}
|
||||
__setup("preempt=", setup_preempt_mode);
|
||||
|
||||
static void __init preempt_dynamic_init(void)
|
||||
{
|
||||
if (preempt_dynamic_mode == preempt_dynamic_undefined) {
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
|
||||
sched_dynamic_update(preempt_dynamic_none);
|
||||
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
||||
sched_dynamic_update(preempt_dynamic_voluntary);
|
||||
} else {
|
||||
/* Default static call setting, nothing to do */
|
||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
||||
preempt_dynamic_mode = preempt_dynamic_full;
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
static inline void preempt_dynamic_init(void) { }
|
||||
|
||||
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
/*
|
||||
* This is the entry point to schedule() from kernel preemption
|
||||
* off of irq context.
|
||||
@ -8202,11 +8161,35 @@ EXPORT_SYMBOL(__cond_resched);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#define cond_resched_dynamic_enabled __cond_resched
|
||||
#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
|
||||
DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
|
||||
EXPORT_STATIC_CALL_TRAMP(cond_resched);
|
||||
|
||||
#define might_resched_dynamic_enabled __cond_resched
|
||||
#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
|
||||
DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
|
||||
EXPORT_STATIC_CALL_TRAMP(might_resched);
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
|
||||
int __sched dynamic_cond_resched(void)
|
||||
{
|
||||
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
|
||||
return 0;
|
||||
return __cond_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(dynamic_cond_resched);
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched);
|
||||
int __sched dynamic_might_resched(void)
|
||||
{
|
||||
if (!static_branch_unlikely(&sk_dynamic_might_resched))
|
||||
return 0;
|
||||
return __cond_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(dynamic_might_resched);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -8271,6 +8254,154 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
|
||||
}
|
||||
EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
#ifdef CONFIG_GENERIC_ENTRY
|
||||
#include <linux/entry-common.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SC:cond_resched
|
||||
* SC:might_resched
|
||||
* SC:preempt_schedule
|
||||
* SC:preempt_schedule_notrace
|
||||
* SC:irqentry_exit_cond_resched
|
||||
*
|
||||
*
|
||||
* NONE:
|
||||
* cond_resched <- __cond_resched
|
||||
* might_resched <- RET0
|
||||
* preempt_schedule <- NOP
|
||||
* preempt_schedule_notrace <- NOP
|
||||
* irqentry_exit_cond_resched <- NOP
|
||||
*
|
||||
* VOLUNTARY:
|
||||
* cond_resched <- __cond_resched
|
||||
* might_resched <- __cond_resched
|
||||
* preempt_schedule <- NOP
|
||||
* preempt_schedule_notrace <- NOP
|
||||
* irqentry_exit_cond_resched <- NOP
|
||||
*
|
||||
* FULL:
|
||||
* cond_resched <- RET0
|
||||
* might_resched <- RET0
|
||||
* preempt_schedule <- preempt_schedule
|
||||
* preempt_schedule_notrace <- preempt_schedule_notrace
|
||||
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
||||
*/
|
||||
|
||||
enum {
|
||||
preempt_dynamic_undefined = -1,
|
||||
preempt_dynamic_none,
|
||||
preempt_dynamic_voluntary,
|
||||
preempt_dynamic_full,
|
||||
};
|
||||
|
||||
int preempt_dynamic_mode = preempt_dynamic_undefined;
|
||||
|
||||
int sched_dynamic_mode(const char *str)
|
||||
{
|
||||
if (!strcmp(str, "none"))
|
||||
return preempt_dynamic_none;
|
||||
|
||||
if (!strcmp(str, "voluntary"))
|
||||
return preempt_dynamic_voluntary;
|
||||
|
||||
if (!strcmp(str, "full"))
|
||||
return preempt_dynamic_full;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
|
||||
#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
|
||||
#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
|
||||
#else
|
||||
#error "Unsupported PREEMPT_DYNAMIC mechanism"
|
||||
#endif
|
||||
|
||||
void sched_dynamic_update(int mode)
|
||||
{
|
||||
/*
|
||||
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
||||
* the ZERO state, which is invalid.
|
||||
*/
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||||
|
||||
switch (mode) {
|
||||
case preempt_dynamic_none:
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: none\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_voluntary:
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: voluntary\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_full:
|
||||
preempt_dynamic_disable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
break;
|
||||
}
|
||||
|
||||
preempt_dynamic_mode = mode;
|
||||
}
|
||||
|
||||
static int __init setup_preempt_mode(char *str)
|
||||
{
|
||||
int mode = sched_dynamic_mode(str);
|
||||
if (mode < 0) {
|
||||
pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
sched_dynamic_update(mode);
|
||||
return 1;
|
||||
}
|
||||
__setup("preempt=", setup_preempt_mode);
|
||||
|
||||
static void __init preempt_dynamic_init(void)
|
||||
{
|
||||
if (preempt_dynamic_mode == preempt_dynamic_undefined) {
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
|
||||
sched_dynamic_update(preempt_dynamic_none);
|
||||
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
||||
sched_dynamic_update(preempt_dynamic_voluntary);
|
||||
} else {
|
||||
/* Default static call setting, nothing to do */
|
||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
||||
preempt_dynamic_mode = preempt_dynamic_full;
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
static inline void preempt_dynamic_init(void) { }
|
||||
|
||||
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
/**
|
||||
* yield - yield the current processor to other threads.
|
||||
*
|
||||
@ -8706,7 +8837,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
if (!cpumask_weight(cur))
|
||||
if (cpumask_empty(cur))
|
||||
return ret;
|
||||
|
||||
ret = dl_cpuset_cpumask_can_shrink(cur, trial);
|
||||
@ -8734,8 +8865,11 @@ int task_can_attach(struct task_struct *p,
|
||||
}
|
||||
|
||||
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
|
||||
cs_cpus_allowed))
|
||||
ret = dl_task_can_attach(p, cs_cpus_allowed);
|
||||
cs_cpus_allowed)) {
|
||||
int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
|
||||
|
||||
ret = dl_cpu_busy(cpu, p);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
@ -9019,8 +9153,10 @@ static void cpuset_cpu_active(void)
|
||||
static int cpuset_cpu_inactive(unsigned int cpu)
|
||||
{
|
||||
if (!cpuhp_tasks_frozen) {
|
||||
if (dl_cpu_busy(cpu))
|
||||
return -EBUSY;
|
||||
int ret = dl_cpu_busy(cpu, NULL);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
cpuset_update_active_cpus();
|
||||
} else {
|
||||
num_cpus_frozen++;
|
||||
@ -9050,6 +9186,7 @@ int sched_cpu_activate(unsigned int cpu)
|
||||
set_cpu_active(cpu, true);
|
||||
|
||||
if (sched_smp_initialized) {
|
||||
sched_update_numa(cpu, true);
|
||||
sched_domains_numa_masks_set(cpu);
|
||||
cpuset_cpu_active();
|
||||
}
|
||||
@ -9128,10 +9265,12 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||
if (!sched_smp_initialized)
|
||||
return 0;
|
||||
|
||||
sched_update_numa(cpu, false);
|
||||
ret = cpuset_cpu_inactive(cpu);
|
||||
if (ret) {
|
||||
balance_push_set(cpu, false);
|
||||
set_cpu_active(cpu, true);
|
||||
sched_update_numa(cpu, true);
|
||||
return ret;
|
||||
}
|
||||
sched_domains_numa_masks_clear(cpu);
|
||||
@ -9234,7 +9373,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
|
||||
void __init sched_init_smp(void)
|
||||
{
|
||||
sched_init_numa();
|
||||
sched_init_numa(NUMA_NO_NODE);
|
||||
|
||||
/*
|
||||
* There's no userspace yet to cause hotplug operations; hence all the
|
||||
@ -9246,7 +9385,7 @@ void __init sched_init_smp(void)
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
|
||||
/* Move init over to a non-isolated CPU */
|
||||
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
|
||||
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
|
||||
BUG();
|
||||
current->flags &= ~PF_NO_SETAFFINITY;
|
||||
sched_init_granularity();
|
||||
@ -9346,7 +9485,6 @@ void __init sched_init(void)
|
||||
#endif /* CONFIG_CPUMASK_OFFSTACK */
|
||||
|
||||
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
|
||||
init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
init_defrootdomain();
|
||||
|
@ -1,8 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/prctl.h>
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* A simple wrapper around refcount. An allocated sched_core_cookie's
|
||||
* address is used to compute the cookie of the task.
|
||||
|
@ -1,12 +1,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* CPU accounting code for task groups.
|
||||
*
|
||||
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
||||
* (balbir@in.ibm.com).
|
||||
*/
|
||||
#include <asm/irq_regs.h>
|
||||
#include "sched.h"
|
||||
|
||||
/* Time spent by the tasks of the CPU accounting group executing in ... */
|
||||
enum cpuacct_stat_index {
|
||||
@ -334,14 +333,13 @@ static struct cftype files[] = {
|
||||
*/
|
||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
unsigned int cpu = task_cpu(tsk);
|
||||
struct cpuacct *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
lockdep_assert_rq_held(cpu_rq(cpu));
|
||||
|
||||
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
||||
__this_cpu_add(*ca->cpuusage, cputime);
|
||||
|
||||
rcu_read_unlock();
|
||||
*per_cpu_ptr(ca->cpuusage, cpu) += cputime;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -353,10 +351,8 @@ void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
|
||||
__this_cpu_add(ca->cpustat->cpustat[index], val);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuacct_cgrp_subsys = {
|
||||
|
@ -1,12 +1,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* kernel/sched/cpudl.c
|
||||
* kernel/sched/cpudeadline.c
|
||||
*
|
||||
* Global CPU deadline management
|
||||
*
|
||||
* Author: Juri Lelli <j.lelli@sssup.it>
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
static inline int parent(int i)
|
||||
{
|
||||
|
@ -5,9 +5,6 @@
|
||||
* Copyright (C) 2016, Intel Corporation
|
||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
*/
|
||||
#include <linux/cpufreq.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
|
||||
|
||||
|
@ -6,13 +6,6 @@
|
||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/sched/cpufreq.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
|
||||
|
||||
struct sugov_tunables {
|
||||
@ -289,6 +282,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
|
||||
* into the same scale so we can compare.
|
||||
*/
|
||||
boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
|
||||
boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
|
||||
if (sg_cpu->util < boost)
|
||||
sg_cpu->util = boost;
|
||||
}
|
||||
@ -348,8 +342,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
|
||||
/*
|
||||
* Do not reduce the frequency if the CPU has not been idle
|
||||
* recently, as the reduction is likely to be premature then.
|
||||
*
|
||||
* Except when the rq is capped by uclamp_max.
|
||||
*/
|
||||
if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
|
||||
if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
|
||||
sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
|
||||
next_f = sg_policy->next_freq;
|
||||
|
||||
/* Restore cached freq as next_freq has changed */
|
||||
@ -395,8 +392,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
|
||||
/*
|
||||
* Do not reduce the target performance level if the CPU has not been
|
||||
* idle recently, as the reduction is likely to be premature then.
|
||||
*
|
||||
* Except when the rq is capped by uclamp_max.
|
||||
*/
|
||||
if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
|
||||
if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
|
||||
sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
|
||||
sg_cpu->util = prev_util;
|
||||
|
||||
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
|
||||
|
@ -22,7 +22,6 @@
|
||||
* worst case complexity of O(min(101, nr_domcpus)), though the scenario that
|
||||
* yields the worst case search is fairly contrived.
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* p->rt_priority p->prio newpri cpupri
|
||||
|
@ -2,7 +2,6 @@
|
||||
/*
|
||||
* Simple CPU accounting cgroup controller
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
|
||||
|
@ -15,10 +15,6 @@
|
||||
* Michael Trimarchi <michael@amarulasolutions.com>,
|
||||
* Fabio Checconi <fchecconi@gmail.com>
|
||||
*/
|
||||
#include "sched.h"
|
||||
#include "pelt.h"
|
||||
|
||||
struct dl_bandwidth def_dl_bandwidth;
|
||||
|
||||
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
@ -130,6 +126,21 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
|
||||
rd->visit_gen = gen;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline
|
||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||
{
|
||||
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
|
||||
int i;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
||||
"sched RCU must be held");
|
||||
for_each_cpu_and(i, rd->span, cpu_active_mask) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
rq->dl.extra_bw += bw;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline struct dl_bw *dl_bw_of(int i)
|
||||
{
|
||||
@ -150,8 +161,37 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline
|
||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||
{
|
||||
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
|
||||
|
||||
dl->extra_bw += bw;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline
|
||||
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||
{
|
||||
dl_b->total_bw -= tsk_bw;
|
||||
__dl_update(dl_b, (s32)tsk_bw / cpus);
|
||||
}
|
||||
|
||||
static inline
|
||||
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||
{
|
||||
dl_b->total_bw += tsk_bw;
|
||||
__dl_update(dl_b, -((s32)tsk_bw / cpus));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
__dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
|
||||
{
|
||||
return dl_b->bw != -1 &&
|
||||
cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
|
||||
}
|
||||
|
||||
static inline
|
||||
void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
|
||||
{
|
||||
@ -408,7 +448,7 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
|
||||
{
|
||||
struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
return dl_rq->root.rb_leftmost == &dl_se->rb_node;
|
||||
return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
|
||||
}
|
||||
|
||||
static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
|
||||
@ -423,12 +463,10 @@ void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
|
||||
void init_dl_bw(struct dl_bw *dl_b)
|
||||
{
|
||||
raw_spin_lock_init(&dl_b->lock);
|
||||
raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
|
||||
if (global_rt_runtime() == RUNTIME_INF)
|
||||
dl_b->bw = -1;
|
||||
else
|
||||
dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
|
||||
raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
|
||||
dl_b->total_bw = 0;
|
||||
}
|
||||
|
||||
@ -683,15 +721,6 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void pull_dl_task(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void deadline_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
}
|
||||
@ -1393,6 +1422,9 @@ void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
|
||||
timer->function = inactive_task_timer;
|
||||
}
|
||||
|
||||
#define __node_2_dle(node) \
|
||||
rb_entry((node), struct sched_dl_entity, rb_node)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
||||
@ -1422,10 +1454,9 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
||||
cpudl_clear(&rq->rd->cpudl, rq->cpu);
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
|
||||
} else {
|
||||
struct rb_node *leftmost = dl_rq->root.rb_leftmost;
|
||||
struct sched_dl_entity *entry;
|
||||
struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
|
||||
struct sched_dl_entity *entry = __node_2_dle(leftmost);
|
||||
|
||||
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
|
||||
dl_rq->earliest_dl.curr = entry->deadline;
|
||||
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
|
||||
}
|
||||
@ -1466,9 +1497,6 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||
dec_dl_migration(dl_se, dl_rq);
|
||||
}
|
||||
|
||||
#define __node_2_dle(node) \
|
||||
rb_entry((node), struct sched_dl_entity, rb_node)
|
||||
|
||||
static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
|
||||
{
|
||||
return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
|
||||
@ -1931,15 +1959,14 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
|
||||
deadline_queue_push_tasks(rq);
|
||||
}
|
||||
|
||||
static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
|
||||
struct dl_rq *dl_rq)
|
||||
static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
|
||||
{
|
||||
struct rb_node *left = rb_first_cached(&dl_rq->root);
|
||||
|
||||
if (!left)
|
||||
return NULL;
|
||||
|
||||
return rb_entry(left, struct sched_dl_entity, rb_node);
|
||||
return __node_2_dle(left);
|
||||
}
|
||||
|
||||
static struct task_struct *pick_task_dl(struct rq *rq)
|
||||
@ -1951,7 +1978,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
|
||||
if (!sched_dl_runnable(rq))
|
||||
return NULL;
|
||||
|
||||
dl_se = pick_next_dl_entity(rq, dl_rq);
|
||||
dl_se = pick_next_dl_entity(dl_rq);
|
||||
BUG_ON(!dl_se);
|
||||
p = dl_task_of(dl_se);
|
||||
|
||||
@ -2034,15 +2061,17 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
*/
|
||||
static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
|
||||
{
|
||||
struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
|
||||
struct task_struct *p = NULL;
|
||||
struct rb_node *next_node;
|
||||
|
||||
if (!has_pushable_dl_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
|
||||
|
||||
next_node:
|
||||
if (next_node) {
|
||||
p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
|
||||
p = __node_2_pdl(next_node);
|
||||
|
||||
if (pick_dl_task(rq, p, cpu))
|
||||
return p;
|
||||
@ -2208,8 +2237,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
||||
if (!has_pushable_dl_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
|
||||
struct task_struct, pushable_dl_tasks);
|
||||
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
|
||||
|
||||
BUG_ON(rq->cpu != task_cpu(p));
|
||||
BUG_ON(task_current(rq, p));
|
||||
@ -2240,12 +2268,6 @@ static int push_dl_task(struct rq *rq)
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
if (is_migration_disabled(next_task))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(next_task == rq->curr))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If next_task preempts rq->curr, and rq->curr
|
||||
* can move away, it makes sense to just reschedule
|
||||
@ -2258,6 +2280,12 @@ retry:
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_migration_disabled(next_task))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(next_task == rq->curr))
|
||||
return 0;
|
||||
|
||||
/* We might release rq lock */
|
||||
get_task_struct(next_task);
|
||||
|
||||
@ -2731,9 +2759,6 @@ void sched_dl_do_global(void)
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
|
||||
def_dl_bandwidth.dl_period = global_rt_period();
|
||||
def_dl_bandwidth.dl_runtime = global_rt_runtime();
|
||||
|
||||
if (global_rt_runtime() != RUNTIME_INF)
|
||||
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
|
||||
|
||||
@ -2955,41 +2980,6 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
|
||||
{
|
||||
unsigned long flags, cap;
|
||||
unsigned int dest_cpu;
|
||||
struct dl_bw *dl_b;
|
||||
bool overflow;
|
||||
int ret;
|
||||
|
||||
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
|
||||
|
||||
rcu_read_lock_sched();
|
||||
dl_b = dl_bw_of(dest_cpu);
|
||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||
cap = dl_bw_capacity(dest_cpu);
|
||||
overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
|
||||
if (overflow) {
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
/*
|
||||
* We reserve space for this task in the destination
|
||||
* root_domain, as we can't fail after this point.
|
||||
* We will free resources in the source root_domain
|
||||
* later on (see set_cpus_allowed_dl()).
|
||||
*/
|
||||
int cpus = dl_bw_cpus(dest_cpu);
|
||||
|
||||
__dl_add(dl_b, p->dl.dl_bw, cpus);
|
||||
ret = 0;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
const struct cpumask *trial)
|
||||
{
|
||||
@ -3011,7 +3001,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool dl_cpu_busy(unsigned int cpu)
|
||||
int dl_cpu_busy(int cpu, struct task_struct *p)
|
||||
{
|
||||
unsigned long flags, cap;
|
||||
struct dl_bw *dl_b;
|
||||
@ -3021,11 +3011,22 @@ bool dl_cpu_busy(unsigned int cpu)
|
||||
dl_b = dl_bw_of(cpu);
|
||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||
cap = dl_bw_capacity(cpu);
|
||||
overflow = __dl_overflow(dl_b, cap, 0, 0);
|
||||
overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
|
||||
|
||||
if (!overflow && p) {
|
||||
/*
|
||||
* We reserve space for this task in the destination
|
||||
* root_domain, as we can't fail after this point.
|
||||
* We will free resources in the source root_domain
|
||||
* later on (see set_cpus_allowed_dl()).
|
||||
*/
|
||||
__dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
return overflow;
|
||||
return overflow ? -EBUSY : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
*
|
||||
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* This allows printing both to /proc/sched_debug and
|
||||
@ -931,25 +930,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
|
||||
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
struct mempolicy *pol;
|
||||
|
||||
if (p->mm)
|
||||
P(mm->numa_scan_seq);
|
||||
|
||||
task_lock(p);
|
||||
pol = p->mempolicy;
|
||||
if (pol && !(pol->flags & MPOL_F_MORON))
|
||||
pol = NULL;
|
||||
mpol_get(pol);
|
||||
task_unlock(p);
|
||||
|
||||
P(numa_pages_migrated);
|
||||
P(numa_preferred_nid);
|
||||
P(total_numa_faults);
|
||||
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
|
||||
task_node(p), task_numa_group_id(p));
|
||||
show_numa_stats(p, m);
|
||||
mpol_put(pol);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,38 @@
|
||||
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
|
||||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
||||
*/
|
||||
#include <linux/energy_model.h>
|
||||
#include <linux/mmap_lock.h>
|
||||
#include <linux/hugetlb_inline.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/mm_api.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/spinlock_api.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/lockdep_api.h>
|
||||
#include <linux/softirq.h>
|
||||
#include <linux/refcount_api.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cond_resched.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mutex_api.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
#include <linux/sched/cond_resched.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include "stats.h"
|
||||
#include "autogroup.h"
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
@ -1259,10 +1290,10 @@ static bool numa_is_active_node(int nid, struct numa_group *ng)
|
||||
|
||||
/* Handle placement on systems where not all nodes are directly connected. */
|
||||
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
int maxdist, bool task)
|
||||
int lim_dist, bool task)
|
||||
{
|
||||
unsigned long score = 0;
|
||||
int node;
|
||||
int node, max_dist;
|
||||
|
||||
/*
|
||||
* All nodes are directly connected, and the same distance
|
||||
@ -1271,6 +1302,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
if (sched_numa_topology_type == NUMA_DIRECT)
|
||||
return 0;
|
||||
|
||||
/* sched_max_numa_distance may be changed in parallel. */
|
||||
max_dist = READ_ONCE(sched_max_numa_distance);
|
||||
/*
|
||||
* This code is called for each node, introducing N^2 complexity,
|
||||
* which should be ok given the number of nodes rarely exceeds 8.
|
||||
@ -1283,7 +1316,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
* The furthest away nodes in the system are not interesting
|
||||
* for placement; nid was already counted.
|
||||
*/
|
||||
if (dist == sched_max_numa_distance || node == nid)
|
||||
if (dist >= max_dist || node == nid)
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -1293,8 +1326,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
* "hoplimit", only nodes closer by than "hoplimit" are part
|
||||
* of each group. Skip other nodes.
|
||||
*/
|
||||
if (sched_numa_topology_type == NUMA_BACKPLANE &&
|
||||
dist >= maxdist)
|
||||
if (sched_numa_topology_type == NUMA_BACKPLANE && dist >= lim_dist)
|
||||
continue;
|
||||
|
||||
/* Add up the faults from nearby nodes. */
|
||||
@ -1312,8 +1344,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
* This seems to result in good task placement.
|
||||
*/
|
||||
if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
|
||||
faults *= (sched_max_numa_distance - dist);
|
||||
faults /= (sched_max_numa_distance - LOCAL_DISTANCE);
|
||||
faults *= (max_dist - dist);
|
||||
faults /= (max_dist - LOCAL_DISTANCE);
|
||||
}
|
||||
|
||||
score += faults;
|
||||
@ -1489,6 +1521,7 @@ struct task_numa_env {
|
||||
|
||||
int src_cpu, src_nid;
|
||||
int dst_cpu, dst_nid;
|
||||
int imb_numa_nr;
|
||||
|
||||
struct numa_stats src_stats, dst_stats;
|
||||
|
||||
@ -1503,7 +1536,7 @@ struct task_numa_env {
|
||||
static unsigned long cpu_load(struct rq *rq);
|
||||
static unsigned long cpu_runnable(struct rq *rq);
|
||||
static inline long adjust_numa_imbalance(int imbalance,
|
||||
int dst_running, int dst_weight);
|
||||
int dst_running, int imb_numa_nr);
|
||||
|
||||
static inline enum
|
||||
numa_type numa_classify(unsigned int imbalance_pct,
|
||||
@ -1884,7 +1917,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
|
||||
dst_running = env->dst_stats.nr_running + 1;
|
||||
imbalance = max(0, dst_running - src_running);
|
||||
imbalance = adjust_numa_imbalance(imbalance, dst_running,
|
||||
env->dst_stats.weight);
|
||||
env->imb_numa_nr);
|
||||
|
||||
/* Use idle CPU if there is no imbalance */
|
||||
if (!imbalance) {
|
||||
@ -1949,8 +1982,10 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
||||
if (sd)
|
||||
if (sd) {
|
||||
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
||||
env.imb_numa_nr = sd->imb_numa_nr;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
@ -1985,7 +2020,7 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
*/
|
||||
ng = deref_curr_numa_group(p);
|
||||
if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
|
||||
for_each_online_node(nid) {
|
||||
for_each_node_state(nid, N_CPU) {
|
||||
if (nid == env.src_nid || nid == p->numa_preferred_nid)
|
||||
continue;
|
||||
|
||||
@ -2083,13 +2118,13 @@ static void numa_group_count_active_nodes(struct numa_group *numa_group)
|
||||
unsigned long faults, max_faults = 0;
|
||||
int nid, active_nodes = 0;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
for_each_node_state(nid, N_CPU) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
if (faults > max_faults)
|
||||
max_faults = faults;
|
||||
}
|
||||
|
||||
for_each_online_node(nid) {
|
||||
for_each_node_state(nid, N_CPU) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
if (faults * ACTIVE_NODE_FRACTION > max_faults)
|
||||
active_nodes++;
|
||||
@ -2243,7 +2278,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
|
||||
|
||||
dist = sched_max_numa_distance;
|
||||
|
||||
for_each_online_node(node) {
|
||||
for_each_node_state(node, N_CPU) {
|
||||
score = group_weight(p, node, dist);
|
||||
if (score > max_score) {
|
||||
max_score = score;
|
||||
@ -2262,7 +2297,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
|
||||
* inside the highest scoring group of nodes. The nodemask tricks
|
||||
* keep the complexity of the search down.
|
||||
*/
|
||||
nodes = node_online_map;
|
||||
nodes = node_states[N_CPU];
|
||||
for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
|
||||
unsigned long max_faults = 0;
|
||||
nodemask_t max_group = NODE_MASK_NONE;
|
||||
@ -2401,6 +2436,21 @@ static void task_numa_placement(struct task_struct *p)
|
||||
}
|
||||
}
|
||||
|
||||
/* Cannot migrate task to CPU-less node */
|
||||
if (max_nid != NUMA_NO_NODE && !node_state(max_nid, N_CPU)) {
|
||||
int near_nid = max_nid;
|
||||
int distance, near_distance = INT_MAX;
|
||||
|
||||
for_each_node_state(nid, N_CPU) {
|
||||
distance = node_distance(max_nid, nid);
|
||||
if (distance < near_distance) {
|
||||
near_nid = nid;
|
||||
near_distance = distance;
|
||||
}
|
||||
}
|
||||
max_nid = near_nid;
|
||||
}
|
||||
|
||||
if (ng) {
|
||||
numa_group_count_active_nodes(ng);
|
||||
spin_unlock_irq(group_lock);
|
||||
@ -2825,6 +2875,8 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
|
||||
/* Protect against double add, see task_tick_numa and task_numa_work */
|
||||
p->numa_work.next = &p->numa_work;
|
||||
p->numa_faults = NULL;
|
||||
p->numa_pages_migrated = 0;
|
||||
p->total_numa_faults = 0;
|
||||
RCU_INIT_POINTER(p->numa_group, NULL);
|
||||
p->last_task_numa_placement = 0;
|
||||
p->last_sum_exec_runtime = 0;
|
||||
@ -9040,9 +9092,9 @@ static bool update_pick_idlest(struct sched_group *idlest,
|
||||
* This is an approximation as the number of running tasks may not be
|
||||
* related to the number of busy CPUs due to sched_setaffinity.
|
||||
*/
|
||||
static inline bool allow_numa_imbalance(int dst_running, int dst_weight)
|
||||
static inline bool allow_numa_imbalance(int running, int imb_numa_nr)
|
||||
{
|
||||
return (dst_running < (dst_weight >> 2));
|
||||
return running <= imb_numa_nr;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -9176,12 +9228,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
|
||||
return idlest;
|
||||
#endif
|
||||
/*
|
||||
* Otherwise, keep the task on this node to stay close
|
||||
* its wakeup source and improve locality. If there is
|
||||
* a real need of migration, periodic load balance will
|
||||
* take care of it.
|
||||
* Otherwise, keep the task close to the wakeup source
|
||||
* and improve locality if the number of running tasks
|
||||
* would remain below threshold where an imbalance is
|
||||
* allowed. If there is a real need of migration,
|
||||
* periodic load balance will take care of it.
|
||||
*/
|
||||
if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight))
|
||||
if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, sd->imb_numa_nr))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -9273,9 +9326,9 @@ next_group:
|
||||
#define NUMA_IMBALANCE_MIN 2
|
||||
|
||||
static inline long adjust_numa_imbalance(int imbalance,
|
||||
int dst_running, int dst_weight)
|
||||
int dst_running, int imb_numa_nr)
|
||||
{
|
||||
if (!allow_numa_imbalance(dst_running, dst_weight))
|
||||
if (!allow_numa_imbalance(dst_running, imb_numa_nr))
|
||||
return imbalance;
|
||||
|
||||
/*
|
||||
@ -9387,7 +9440,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
/* Consider allowing a small imbalance between NUMA groups */
|
||||
if (env->sd->flags & SD_NUMA) {
|
||||
env->imbalance = adjust_numa_imbalance(env->imbalance,
|
||||
busiest->sum_nr_running, busiest->group_weight);
|
||||
local->sum_nr_running + 1, env->sd->imb_numa_nr);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -10351,7 +10404,7 @@ static inline int on_null_domain(struct rq *rq)
|
||||
* - When one of the busy CPUs notice that there may be an idle rebalancing
|
||||
* needed, they will kick the idle load balancer, which then does idle
|
||||
* load balancing for all the idle CPUs.
|
||||
* - HK_FLAG_MISC CPUs are used for this task, because HK_FLAG_SCHED not set
|
||||
* - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED not set
|
||||
* anywhere yet.
|
||||
*/
|
||||
|
||||
@ -10360,7 +10413,7 @@ static inline int find_new_ilb(void)
|
||||
int ilb;
|
||||
const struct cpumask *hk_mask;
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_FLAG_MISC);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_MISC);
|
||||
|
||||
for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
|
||||
|
||||
@ -10376,7 +10429,7 @@ static inline int find_new_ilb(void)
|
||||
|
||||
/*
|
||||
* Kick a CPU to do the nohz balancing, if it is time for it. We pick any
|
||||
* idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
|
||||
* idle CPU in the HK_TYPE_MISC housekeeping set (if there is one).
|
||||
*/
|
||||
static void kick_ilb(unsigned int flags)
|
||||
{
|
||||
@ -10589,7 +10642,7 @@ void nohz_balance_enter_idle(int cpu)
|
||||
return;
|
||||
|
||||
/* Spare idle load balancing on CPUs that don't want to be disturbed: */
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_SCHED))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -10805,7 +10858,7 @@ static void nohz_newidle_balance(struct rq *this_rq)
|
||||
* This CPU doesn't want to be disturbed by scheduler
|
||||
* housekeeping
|
||||
*/
|
||||
if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
|
||||
if (!housekeeping_cpu(this_cpu, HK_TYPE_SCHED))
|
||||
return;
|
||||
|
||||
/* Will wake up very soon. No time for doing anything else*/
|
||||
|
@ -6,9 +6,6 @@
|
||||
* (NOTE: these are not related to SCHED_IDLE batch scheduled
|
||||
* tasks which are handled in sched/fair.c )
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
/* Linker adds these: start and end of __cpuidle functions */
|
||||
extern char __cpuidle_text_start[], __cpuidle_text_end[];
|
||||
|
@ -7,136 +7,179 @@
|
||||
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
|
||||
*
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
enum hk_flags {
|
||||
HK_FLAG_TIMER = BIT(HK_TYPE_TIMER),
|
||||
HK_FLAG_RCU = BIT(HK_TYPE_RCU),
|
||||
HK_FLAG_MISC = BIT(HK_TYPE_MISC),
|
||||
HK_FLAG_SCHED = BIT(HK_TYPE_SCHED),
|
||||
HK_FLAG_TICK = BIT(HK_TYPE_TICK),
|
||||
HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
|
||||
HK_FLAG_WQ = BIT(HK_TYPE_WQ),
|
||||
HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
|
||||
HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD),
|
||||
};
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
|
||||
EXPORT_SYMBOL_GPL(housekeeping_overridden);
|
||||
static cpumask_var_t housekeeping_mask;
|
||||
static unsigned int housekeeping_flags;
|
||||
|
||||
bool housekeeping_enabled(enum hk_flags flags)
|
||||
struct housekeeping {
|
||||
cpumask_var_t cpumasks[HK_TYPE_MAX];
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
static struct housekeeping housekeeping;
|
||||
|
||||
bool housekeeping_enabled(enum hk_type type)
|
||||
{
|
||||
return !!(housekeeping_flags & flags);
|
||||
return !!(housekeeping.flags & BIT(type));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_enabled);
|
||||
|
||||
int housekeeping_any_cpu(enum hk_flags flags)
|
||||
int housekeeping_any_cpu(enum hk_type type)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (static_branch_unlikely(&housekeeping_overridden)) {
|
||||
if (housekeeping_flags & flags) {
|
||||
cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
|
||||
if (housekeeping.flags & BIT(type)) {
|
||||
cpu = sched_numa_find_closest(housekeeping.cpumasks[type], smp_processor_id());
|
||||
if (cpu < nr_cpu_ids)
|
||||
return cpu;
|
||||
|
||||
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
|
||||
return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
|
||||
}
|
||||
}
|
||||
return smp_processor_id();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
|
||||
|
||||
const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
|
||||
const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping_flags & flags)
|
||||
return housekeeping_mask;
|
||||
if (housekeeping.flags & BIT(type))
|
||||
return housekeeping.cpumasks[type];
|
||||
return cpu_possible_mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
|
||||
|
||||
void housekeeping_affine(struct task_struct *t, enum hk_flags flags)
|
||||
void housekeeping_affine(struct task_struct *t, enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping_flags & flags)
|
||||
set_cpus_allowed_ptr(t, housekeeping_mask);
|
||||
if (housekeeping.flags & BIT(type))
|
||||
set_cpus_allowed_ptr(t, housekeeping.cpumasks[type]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_affine);
|
||||
|
||||
bool housekeeping_test_cpu(int cpu, enum hk_flags flags)
|
||||
bool housekeeping_test_cpu(int cpu, enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping_flags & flags)
|
||||
return cpumask_test_cpu(cpu, housekeeping_mask);
|
||||
if (housekeeping.flags & BIT(type))
|
||||
return cpumask_test_cpu(cpu, housekeeping.cpumasks[type]);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
|
||||
|
||||
void __init housekeeping_init(void)
|
||||
{
|
||||
if (!housekeeping_flags)
|
||||
enum hk_type type;
|
||||
|
||||
if (!housekeeping.flags)
|
||||
return;
|
||||
|
||||
static_branch_enable(&housekeeping_overridden);
|
||||
|
||||
if (housekeeping_flags & HK_FLAG_TICK)
|
||||
if (housekeeping.flags & HK_FLAG_TICK)
|
||||
sched_tick_offload_init();
|
||||
|
||||
/* We need at least one CPU to handle housekeeping work */
|
||||
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
|
||||
for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
|
||||
/* We need at least one CPU to handle housekeeping work */
|
||||
WARN_ON_ONCE(cpumask_empty(housekeeping.cpumasks[type]));
|
||||
}
|
||||
}
|
||||
|
||||
static int __init housekeeping_setup(char *str, enum hk_flags flags)
|
||||
static void __init housekeeping_setup_type(enum hk_type type,
|
||||
cpumask_var_t housekeeping_staging)
|
||||
{
|
||||
cpumask_var_t non_housekeeping_mask;
|
||||
cpumask_var_t tmp;
|
||||
|
||||
alloc_bootmem_cpumask_var(&housekeeping.cpumasks[type]);
|
||||
cpumask_copy(housekeeping.cpumasks[type],
|
||||
housekeeping_staging);
|
||||
}
|
||||
|
||||
static int __init housekeeping_setup(char *str, unsigned long flags)
|
||||
{
|
||||
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
|
||||
int err = 0;
|
||||
|
||||
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
|
||||
if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) {
|
||||
pr_warn("Housekeeping: nohz unsupported."
|
||||
" Build with CONFIG_NO_HZ_FULL\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
|
||||
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
|
||||
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
|
||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
||||
return 0;
|
||||
goto free_non_housekeeping_mask;
|
||||
}
|
||||
|
||||
alloc_bootmem_cpumask_var(&tmp);
|
||||
if (!housekeeping_flags) {
|
||||
alloc_bootmem_cpumask_var(&housekeeping_mask);
|
||||
cpumask_andnot(housekeeping_mask,
|
||||
cpu_possible_mask, non_housekeeping_mask);
|
||||
alloc_bootmem_cpumask_var(&housekeeping_staging);
|
||||
cpumask_andnot(housekeeping_staging,
|
||||
cpu_possible_mask, non_housekeeping_mask);
|
||||
|
||||
cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
|
||||
if (cpumask_empty(tmp)) {
|
||||
if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
|
||||
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
|
||||
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
||||
if (!housekeeping.flags) {
|
||||
pr_warn("Housekeeping: must include one present CPU, "
|
||||
"using boot CPU:%d\n", smp_processor_id());
|
||||
__cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
|
||||
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
||||
}
|
||||
}
|
||||
|
||||
if (!housekeeping.flags) {
|
||||
/* First setup call ("nohz_full=" or "isolcpus=") */
|
||||
enum hk_type type;
|
||||
|
||||
for_each_set_bit(type, &flags, HK_TYPE_MAX)
|
||||
housekeeping_setup_type(type, housekeeping_staging);
|
||||
} else {
|
||||
cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
|
||||
if (cpumask_empty(tmp))
|
||||
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
||||
cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
|
||||
if (!cpumask_equal(tmp, housekeeping_mask)) {
|
||||
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
|
||||
free_bootmem_cpumask_var(tmp);
|
||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
free_bootmem_cpumask_var(tmp);
|
||||
/* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */
|
||||
enum hk_type type;
|
||||
unsigned long iter_flags = flags & housekeeping.flags;
|
||||
|
||||
if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
|
||||
if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
|
||||
tick_nohz_full_setup(non_housekeeping_mask);
|
||||
} else {
|
||||
pr_warn("Housekeeping: nohz unsupported."
|
||||
" Build with CONFIG_NO_HZ_FULL\n");
|
||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
||||
return 0;
|
||||
for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) {
|
||||
if (!cpumask_equal(housekeeping_staging,
|
||||
housekeeping.cpumasks[type])) {
|
||||
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
|
||||
goto free_housekeeping_staging;
|
||||
}
|
||||
}
|
||||
|
||||
iter_flags = flags & ~housekeeping.flags;
|
||||
|
||||
for_each_set_bit(type, &iter_flags, HK_TYPE_MAX)
|
||||
housekeeping_setup_type(type, housekeeping_staging);
|
||||
}
|
||||
|
||||
housekeeping_flags |= flags;
|
||||
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK))
|
||||
tick_nohz_full_setup(non_housekeeping_mask);
|
||||
|
||||
housekeeping.flags |= flags;
|
||||
err = 1;
|
||||
|
||||
free_housekeeping_staging:
|
||||
free_bootmem_cpumask_var(housekeeping_staging);
|
||||
free_non_housekeeping_mask:
|
||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
||||
|
||||
return 1;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __init housekeeping_nohz_full_setup(char *str)
|
||||
{
|
||||
unsigned int flags;
|
||||
unsigned long flags;
|
||||
|
||||
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
|
||||
HK_FLAG_MISC | HK_FLAG_KTHREAD;
|
||||
@ -147,7 +190,7 @@ __setup("nohz_full=", housekeeping_nohz_full_setup);
|
||||
|
||||
static int __init housekeeping_isolcpus_setup(char *str)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
unsigned long flags = 0;
|
||||
bool illegal = false;
|
||||
char *par;
|
||||
int len;
|
||||
|
@ -6,7 +6,6 @@
|
||||
* figure. Its a silly number but people think its important. We go through
|
||||
* great pains to make it work on big machines and tickless kernels.
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* Global load-average calculations
|
||||
|
@ -4,7 +4,6 @@
|
||||
*
|
||||
* membarrier system call
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* For documentation purposes, here are some membarrier ordering
|
||||
|
@ -24,10 +24,6 @@
|
||||
* Author: Vincent Guittot <vincent.guittot@linaro.org>
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include "sched.h"
|
||||
#include "pelt.h"
|
||||
|
||||
/*
|
||||
* Approximate:
|
||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||
|
@ -137,21 +137,6 @@
|
||||
* sampling of the aggregate task states would be.
|
||||
*/
|
||||
|
||||
#include "../workqueue_internal.h"
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/psi.h>
|
||||
#include "sched.h"
|
||||
|
||||
static int psi_bug __read_mostly;
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(psi_disabled);
|
||||
@ -523,7 +508,7 @@ static void init_triggers(struct psi_group *group, u64 now)
|
||||
static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
{
|
||||
struct psi_trigger *t;
|
||||
bool new_stall = false;
|
||||
bool update_total = false;
|
||||
u64 *total = group->total[PSI_POLL];
|
||||
|
||||
/*
|
||||
@ -532,24 +517,35 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
*/
|
||||
list_for_each_entry(t, &group->triggers, node) {
|
||||
u64 growth;
|
||||
bool new_stall;
|
||||
|
||||
/* Check for stall activity */
|
||||
if (group->polling_total[t->state] == total[t->state])
|
||||
new_stall = group->polling_total[t->state] != total[t->state];
|
||||
|
||||
/* Check for stall activity or a previous threshold breach */
|
||||
if (!new_stall && !t->pending_event)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Multiple triggers might be looking at the same state,
|
||||
* remember to update group->polling_total[] once we've
|
||||
* been through all of them. Also remember to extend the
|
||||
* polling time if we see new stall activity.
|
||||
* Check for new stall activity, as well as deferred
|
||||
* events that occurred in the last window after the
|
||||
* trigger had already fired (we want to ratelimit
|
||||
* events without dropping any).
|
||||
*/
|
||||
new_stall = true;
|
||||
if (new_stall) {
|
||||
/*
|
||||
* Multiple triggers might be looking at the same state,
|
||||
* remember to update group->polling_total[] once we've
|
||||
* been through all of them. Also remember to extend the
|
||||
* polling time if we see new stall activity.
|
||||
*/
|
||||
update_total = true;
|
||||
|
||||
/* Calculate growth since last update */
|
||||
growth = window_update(&t->win, now, total[t->state]);
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
/* Calculate growth since last update */
|
||||
growth = window_update(&t->win, now, total[t->state]);
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
|
||||
t->pending_event = true;
|
||||
}
|
||||
/* Limit event signaling to once per window */
|
||||
if (now < t->last_event_time + t->win.size)
|
||||
continue;
|
||||
@ -558,9 +554,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
if (cmpxchg(&t->event, 0, 1) == 0)
|
||||
wake_up_interruptible(&t->event_wait);
|
||||
t->last_event_time = now;
|
||||
/* Reset threshold breach flag once event got generated */
|
||||
t->pending_event = false;
|
||||
}
|
||||
|
||||
if (new_stall)
|
||||
if (update_total)
|
||||
memcpy(group->polling_total, total,
|
||||
sizeof(group->polling_total));
|
||||
|
||||
@ -1124,6 +1122,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
t->event = 0;
|
||||
t->last_event_time = 0;
|
||||
init_waitqueue_head(&t->event_wait);
|
||||
t->pending_event = false;
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
|
||||
|
@ -3,9 +3,6 @@
|
||||
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
||||
* policies)
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
#include "pelt.h"
|
||||
|
||||
int sched_rr_timeslice = RR_TIMESLICE;
|
||||
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||
@ -271,8 +268,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void pull_rt_task(struct rq *this_rq);
|
||||
|
||||
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
/* Try to pull RT tasks here if we lower this rq's prio */
|
||||
@ -429,15 +424,6 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void pull_rt_task(struct rq *this_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rt_queue_push_tasks(struct rq *rq)
|
||||
{
|
||||
}
|
||||
@ -1730,8 +1716,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
|
||||
rt_queue_push_tasks(rq);
|
||||
}
|
||||
|
||||
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
||||
struct rt_rq *rt_rq)
|
||||
static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
struct sched_rt_entity *next = NULL;
|
||||
@ -1753,7 +1738,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
|
||||
struct rt_rq *rt_rq = &rq->rt;
|
||||
|
||||
do {
|
||||
rt_se = pick_next_rt_entity(rq, rt_rq);
|
||||
rt_se = pick_next_rt_entity(rt_rq);
|
||||
BUG_ON(!rt_se);
|
||||
rt_rq = group_rt_rq(rt_se);
|
||||
} while (rt_rq);
|
||||
@ -2026,6 +2011,16 @@ static int push_rt_task(struct rq *rq, bool pull)
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
/*
|
||||
* It's possible that the next_task slipped in of
|
||||
* higher priority than current. If that's the case
|
||||
* just reschedule current.
|
||||
*/
|
||||
if (unlikely(next_task->prio < rq->curr->prio)) {
|
||||
resched_curr(rq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_migration_disabled(next_task)) {
|
||||
struct task_struct *push_task = NULL;
|
||||
int cpu;
|
||||
@ -2033,6 +2028,18 @@ retry:
|
||||
if (!pull || rq->push_busy)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Invoking find_lowest_rq() on anything but an RT task doesn't
|
||||
* make sense. Per the above priority check, curr has to
|
||||
* be of higher priority than next_task, so no need to
|
||||
* reschedule when bailing out.
|
||||
*
|
||||
* Note that the stoppers are masqueraded as SCHED_FIFO
|
||||
* (cf. sched_set_stop_task()), so we can't rely on rt_task().
|
||||
*/
|
||||
if (rq->curr->sched_class != &rt_sched_class)
|
||||
return 0;
|
||||
|
||||
cpu = find_lowest_rq(rq->curr);
|
||||
if (cpu == -1 || cpu == rq->cpu)
|
||||
return 0;
|
||||
@ -2057,16 +2064,6 @@ retry:
|
||||
if (WARN_ON(next_task == rq->curr))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* It's possible that the next_task slipped in of
|
||||
* higher priority than current. If that's the case
|
||||
* just reschedule current.
|
||||
*/
|
||||
if (unlikely(next_task->prio < rq->curr->prio)) {
|
||||
resched_curr(rq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We might release rq lock */
|
||||
get_task_struct(next_task);
|
||||
|
||||
|
@ -2,86 +2,98 @@
|
||||
/*
|
||||
* Scheduler internal types and methods:
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#ifndef _KERNEL_SCHED_SCHED_H
|
||||
#define _KERNEL_SCHED_SCHED_H
|
||||
|
||||
#include <linux/sched/affinity.h>
|
||||
#include <linux/sched/autogroup.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/coredump.h>
|
||||
#include <linux/sched/cpufreq.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <linux/sched/idle.h>
|
||||
#include <linux/sched/init.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/jobctl.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/nohz.h>
|
||||
#include <linux/sched/numa_balancing.h>
|
||||
#include <linux/sched/prio.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/rseq_api.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/smt.h>
|
||||
#include <linux/sched/stat.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sched/task_flags.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/sched/user.h>
|
||||
#include <linux/sched/wake_q.h>
|
||||
#include <linux/sched/xacct.h>
|
||||
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/cgroup_api.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/delayacct.h>
|
||||
#include <linux/energy_model.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs_api.h>
|
||||
#include <linux/hrtimer_api.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kref_api.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/ktime_api.h>
|
||||
#include <linux/lockdep_api.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex_api.h>
|
||||
#include <linux/plist.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/softirq.h>
|
||||
#include <linux/spinlock_api.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/swait.h>
|
||||
#include <linux/syscalls_api.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/tsacct_kern.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/u64_stats_sync_api.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/wait_api.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/workqueue_api.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include "../workqueue_internal.h"
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/psi.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
# include <linux/static_key.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
# include <asm/paravirt.h>
|
||||
# include <asm/paravirt_api_clock.h>
|
||||
#endif
|
||||
|
||||
#include "cpupri.h"
|
||||
#include "cpudeadline.h"
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
|
||||
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
|
||||
#else
|
||||
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
|
||||
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
|
||||
#endif
|
||||
|
||||
struct rq;
|
||||
@ -301,29 +313,6 @@ struct dl_bw {
|
||||
u64 total_bw;
|
||||
};
|
||||
|
||||
static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
|
||||
|
||||
static inline
|
||||
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||
{
|
||||
dl_b->total_bw -= tsk_bw;
|
||||
__dl_update(dl_b, (s32)tsk_bw / cpus);
|
||||
}
|
||||
|
||||
static inline
|
||||
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||
{
|
||||
dl_b->total_bw += tsk_bw;
|
||||
__dl_update(dl_b, -((s32)tsk_bw / cpus));
|
||||
}
|
||||
|
||||
static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
|
||||
u64 old_bw, u64 new_bw)
|
||||
{
|
||||
return dl_b->bw != -1 &&
|
||||
cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify the fitness of task @p to run on @cpu taking into account the
|
||||
* CPU original capacity and the runtime/deadline ratio of the task.
|
||||
@ -347,15 +336,11 @@ extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
|
||||
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
|
||||
extern bool __checkparam_dl(const struct sched_attr *attr);
|
||||
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
|
||||
extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
|
||||
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
|
||||
extern bool dl_cpu_busy(unsigned int cpu);
|
||||
extern int dl_cpu_busy(int cpu, struct task_struct *p);
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/psi.h>
|
||||
|
||||
struct cfs_rq;
|
||||
struct rt_rq;
|
||||
|
||||
@ -1662,12 +1647,14 @@ enum numa_topology_type {
|
||||
extern enum numa_topology_type sched_numa_topology_type;
|
||||
extern int sched_max_numa_distance;
|
||||
extern bool find_numa_distance(int distance);
|
||||
extern void sched_init_numa(void);
|
||||
extern void sched_init_numa(int offline_node);
|
||||
extern void sched_update_numa(int cpu, bool online);
|
||||
extern void sched_domains_numa_masks_set(unsigned int cpu);
|
||||
extern void sched_domains_numa_masks_clear(unsigned int cpu);
|
||||
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
|
||||
#else
|
||||
static inline void sched_init_numa(void) { }
|
||||
static inline void sched_init_numa(int offline_node) { }
|
||||
static inline void sched_update_numa(int cpu, bool online) { }
|
||||
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
|
||||
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
|
||||
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
|
||||
@ -1854,7 +1841,6 @@ static inline void flush_smp_call_function_from_idle(void) { }
|
||||
#endif
|
||||
|
||||
#include "stats.h"
|
||||
#include "autogroup.h"
|
||||
|
||||
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
|
||||
|
||||
@ -1950,7 +1936,6 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
|
||||
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
|
||||
*/
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
# include <linux/static_key.h>
|
||||
# define const_debug __read_mostly
|
||||
#else
|
||||
# define const_debug const
|
||||
@ -2331,7 +2316,6 @@ extern void resched_cpu(int cpu);
|
||||
extern struct rt_bandwidth def_rt_bandwidth;
|
||||
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
||||
|
||||
extern struct dl_bandwidth def_dl_bandwidth;
|
||||
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
|
||||
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
|
||||
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
|
||||
@ -2747,32 +2731,6 @@ extern void nohz_run_idle_balance(int cpu);
|
||||
static inline void nohz_run_idle_balance(int cpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline
|
||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||
{
|
||||
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
|
||||
int i;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
||||
"sched RCU must be held");
|
||||
for_each_cpu_and(i, rd->span, cpu_active_mask) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
rq->dl.extra_bw += bw;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline
|
||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||
{
|
||||
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
|
||||
|
||||
dl->extra_bw += bw;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
struct irqtime {
|
||||
u64 total;
|
||||
@ -2841,88 +2799,6 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
|
||||
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
|
||||
#endif /* CONFIG_CPU_FREQ */
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||
|
||||
/**
|
||||
* uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
|
||||
* @rq: The rq to clamp against. Must not be NULL.
|
||||
* @util: The util value to clamp.
|
||||
* @p: The task to clamp against. Can be NULL if you want to clamp
|
||||
* against @rq only.
|
||||
*
|
||||
* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
|
||||
*
|
||||
* If sched_uclamp_used static key is disabled, then just return the util
|
||||
* without any clamping since uclamp aggregation at the rq level in the fast
|
||||
* path is disabled, rendering this operation a NOP.
|
||||
*
|
||||
* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
|
||||
* will return the correct effective uclamp value of the task even if the
|
||||
* static key is disabled.
|
||||
*/
|
||||
static __always_inline
|
||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned long min_util = 0;
|
||||
unsigned long max_util = 0;
|
||||
|
||||
if (!static_branch_likely(&sched_uclamp_used))
|
||||
return util;
|
||||
|
||||
if (p) {
|
||||
min_util = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
max_util = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
|
||||
/*
|
||||
* Ignore last runnable task's max clamp, as this task will
|
||||
* reset it. Similarly, no need to read the rq's min clamp.
|
||||
*/
|
||||
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
||||
goto out;
|
||||
}
|
||||
|
||||
min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
|
||||
max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
|
||||
out:
|
||||
/*
|
||||
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
||||
* RUNNABLE tasks with _different_ clamps, we can end up with an
|
||||
* inversion. Fix it now when the clamps are applied.
|
||||
*/
|
||||
if (unlikely(min_util >= max_util))
|
||||
return min_util;
|
||||
|
||||
return clamp(util, min_util, max_util);
|
||||
}
|
||||
|
||||
/*
|
||||
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
|
||||
* by default in the fast path and only gets turned on once userspace performs
|
||||
* an operation that requires it.
|
||||
*
|
||||
* Returns true if userspace opted-in to use uclamp and aggregation at rq level
|
||||
* hence is active.
|
||||
*/
|
||||
static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return static_branch_likely(&sched_uclamp_used);
|
||||
}
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline
|
||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
return util;
|
||||
}
|
||||
|
||||
static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
#ifdef arch_scale_freq_capacity
|
||||
# ifndef arch_scale_freq_invariant
|
||||
# define arch_scale_freq_invariant() true
|
||||
@ -3020,6 +2896,105 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||
|
||||
/**
|
||||
* uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
|
||||
* @rq: The rq to clamp against. Must not be NULL.
|
||||
* @util: The util value to clamp.
|
||||
* @p: The task to clamp against. Can be NULL if you want to clamp
|
||||
* against @rq only.
|
||||
*
|
||||
* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
|
||||
*
|
||||
* If sched_uclamp_used static key is disabled, then just return the util
|
||||
* without any clamping since uclamp aggregation at the rq level in the fast
|
||||
* path is disabled, rendering this operation a NOP.
|
||||
*
|
||||
* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
|
||||
* will return the correct effective uclamp value of the task even if the
|
||||
* static key is disabled.
|
||||
*/
|
||||
static __always_inline
|
||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned long min_util = 0;
|
||||
unsigned long max_util = 0;
|
||||
|
||||
if (!static_branch_likely(&sched_uclamp_used))
|
||||
return util;
|
||||
|
||||
if (p) {
|
||||
min_util = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
max_util = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
|
||||
/*
|
||||
* Ignore last runnable task's max clamp, as this task will
|
||||
* reset it. Similarly, no need to read the rq's min clamp.
|
||||
*/
|
||||
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
||||
goto out;
|
||||
}
|
||||
|
||||
min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
|
||||
max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
|
||||
out:
|
||||
/*
|
||||
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
||||
* RUNNABLE tasks with _different_ clamps, we can end up with an
|
||||
* inversion. Fix it now when the clamps are applied.
|
||||
*/
|
||||
if (unlikely(min_util >= max_util))
|
||||
return min_util;
|
||||
|
||||
return clamp(util, min_util, max_util);
|
||||
}
|
||||
|
||||
/* Is the rq being capped/throttled by uclamp_max? */
|
||||
static inline bool uclamp_rq_is_capped(struct rq *rq)
|
||||
{
|
||||
unsigned long rq_util;
|
||||
unsigned long max_util;
|
||||
|
||||
if (!static_branch_likely(&sched_uclamp_used))
|
||||
return false;
|
||||
|
||||
rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
|
||||
max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
|
||||
|
||||
return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
|
||||
}
|
||||
|
||||
/*
|
||||
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
|
||||
* by default in the fast path and only gets turned on once userspace performs
|
||||
* an operation that requires it.
|
||||
*
|
||||
* Returns true if userspace opted-in to use uclamp and aggregation at rq level
|
||||
* hence is active.
|
||||
*/
|
||||
static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return static_branch_likely(&sched_uclamp_used);
|
||||
}
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline
|
||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
return util;
|
||||
}
|
||||
|
||||
static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
|
||||
|
||||
static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||
static inline unsigned long cpu_util_irq(struct rq *rq)
|
||||
{
|
||||
@ -3118,3 +3093,4 @@ extern int sched_dynamic_mode(const char *str);
|
||||
extern void sched_dynamic_update(int mode);
|
||||
#endif
|
||||
|
||||
#endif /* _KERNEL_SCHED_SCHED_H */
|
||||
|
@ -2,7 +2,6 @@
|
||||
/*
|
||||
* /proc/schedstat implementation
|
||||
*/
|
||||
#include "sched.h"
|
||||
|
||||
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
|
||||
struct sched_statistics *stats)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user