mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-11-30 21:44:06 +08:00
Scheduler changes for v6.4:
- Allow unprivileged PSI poll()ing - Fix performance regression introduced by mm_cid - Improve livepatch stalls by adding livepatch task switching to cond_resched(), this resolves livepatching busy-loop stalls with certain CPU-bound kthreads. - Improve sched_move_task() performance on autogroup configs. - On core-scheduling CPUs, avoid selecting throttled tasks to run - Misc cleanups, fixes and improvements. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmRK39cRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hXPhAAk2WqOV2cW4BjSCHjWWE05IfTb0HMn8si mFGBAnr1GIkJRvICAusAwDU3FcmP5mWyXA+LK110d3x4fKJP15vCD5ru5lHnBfX7 fSD+Ml8uM4Xlp8iUoQspilbQwmWkQSwhudbDs3Nj7XGUzJCvNgm1sM3xPRDlqSJ5 6zumfVOPTfzSGcZY3a8sMuJnCepZHLRR6NkLzo/DuI1NMy2Jw1dK43dh77AO1mBF M53PF2IQgm6Wu/67p2k5eDq4c0AKL4PyIb4dRTGOPyljWMf41n28jwMv1tjlvu+Y uT0JD8MJSrFiylyT41x7Asr7orAGXj3cPhShK5R0vrutx/SbqBiaaE1MO9U3aC3B 7xVXEORHWD6KIDqTvzmWGrMBkIdyWB6CLk6EJKr3MqM9hUtP2ift7bkAgIad9h+4 G9DdVePGoCyh/TQtJ9EPIULAYeu9mmDZe8rTQ8C5MCSg//05/CTMgBbb0NiFWhnd 0JQl1B0nNUA87whVUxK8Hfu4DLh7m9jrzgQr9Ww8/FwQ6tQHBOKWgDdbv45ckkaG cJIQt/+vLilddazc8u8E+BGaD5w2uIYF0uL7kvG6Q5oARX06AZ5dj1m06vhZe/Ym laOVZEpJsbQnxviY6jwj1n+CSB9aK7feiQfDePBPbpJGGUHyZoKrnLN6wmW2se+H VCHtdgsEl5I= =Hgci -----END PGP SIGNATURE----- Merge tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: - Allow unprivileged PSI poll()ing - Fix performance regression introduced by mm_cid - Improve livepatch stalls by adding livepatch task switching to cond_resched(). This resolves livepatching busy-loop stalls with certain CPU-bound kthreads - Improve sched_move_task() performance on autogroup configs - On core-scheduling CPUs, avoid selecting throttled tasks to run - Misc cleanups, fixes and improvements * tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/clock: Fix local_clock() before sched_clock_init() sched/rt: Fix bad task migration for rt tasks sched: Fix performance regression introduced by mm_cid sched/core: Make sched_dynamic_mutex static sched/psi: Allow unprivileged polling of N*2s period sched/psi: Extract update_triggers side effect sched/psi: Rename existing poll members in preparation sched/psi: Rearrange polling code in preparation sched/fair: Fix inaccurate tally of ttwu_move_affine vhost: Fix livepatch timeouts in vhost_worker() livepatch,sched: Add livepatch task switching to cond_resched() livepatch: Skip task_call_func() for current task livepatch: Convert stack entries array to percpu sched: Interleave cfs bandwidth timers for improved single thread performance at low utilization sched/core: Reduce cost of sched_move_task when config autogroup sched/core: Avoid selecting the task that is throttled to run when core-sched enable sched/topology: Make sched_energy_mutex,update static
This commit is contained in:
commit
586b222d74
@ -105,6 +105,10 @@ prevent overly frequent polling. Max limit is chosen as a high enough number
|
||||
after which monitors are most likely not needed and psi averages can be used
|
||||
instead.
|
||||
|
||||
Unprivileged users can also create monitors, with the only limitation that the
|
||||
window size must be a multiple of 2s, in order to prevent excessive resource
|
||||
usage.
|
||||
|
||||
When activated, psi monitor stays active for at least the duration of one
|
||||
tracking window to avoid repeated activations/deactivations when system is
|
||||
bouncing in and out of the stall state.
|
||||
|
@ -361,8 +361,7 @@ static int vhost_worker(void *data)
|
||||
kcov_remote_start_common(worker->kcov_handle);
|
||||
work->fn(work);
|
||||
kcov_remote_stop();
|
||||
if (need_resched())
|
||||
schedule();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/livepatch_sched.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_LIVEPATCH)
|
||||
|
||||
|
29
include/linux/livepatch_sched.h
Normal file
29
include/linux/livepatch_sched.h
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef _LINUX_LIVEPATCH_SCHED_H_
|
||||
#define _LINUX_LIVEPATCH_SCHED_H_
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/static_call_types.h>
|
||||
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
|
||||
void __klp_sched_try_switch(void);
|
||||
|
||||
#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
|
||||
|
||||
static __always_inline void klp_sched_try_switch(void)
|
||||
{
|
||||
if (static_branch_unlikely(&klp_sched_try_switch_key))
|
||||
__klp_sched_try_switch();
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
#else /* !CONFIG_LIVEPATCH */
|
||||
static inline void klp_sched_try_switch(void) {}
|
||||
static inline void __klp_sched_try_switch(void) {}
|
||||
#endif /* CONFIG_LIVEPATCH */
|
||||
|
||||
#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
|
@ -573,6 +573,13 @@ struct vm_area_struct {
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
|
||||
} __randomize_layout;
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
struct mm_cid {
|
||||
u64 time;
|
||||
int cid;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct kioctx_table;
|
||||
struct mm_struct {
|
||||
struct {
|
||||
@ -623,15 +630,19 @@ struct mm_struct {
|
||||
atomic_t mm_count;
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
/**
|
||||
* @cid_lock: Protect cid bitmap updates vs lookups.
|
||||
* @pcpu_cid: Per-cpu current cid.
|
||||
*
|
||||
* Prevent situations where updates to the cid bitmap happen
|
||||
* concurrently with lookups. Those can lead to situations
|
||||
* where a lookup cannot find a free bit simply because it was
|
||||
* unlucky enough to load, non-atomically, bitmap words as they
|
||||
* were being concurrently updated by the updaters.
|
||||
* Keep track of the currently allocated mm_cid for each cpu.
|
||||
* The per-cpu mm_cid values are serialized by their respective
|
||||
* runqueue locks.
|
||||
*/
|
||||
raw_spinlock_t cid_lock;
|
||||
struct mm_cid __percpu *pcpu_cid;
|
||||
/*
|
||||
* @mm_cid_next_scan: Next mm_cid scan (in jiffies).
|
||||
*
|
||||
* When the next mm_cid scan is due (in jiffies).
|
||||
*/
|
||||
unsigned long mm_cid_next_scan;
|
||||
#endif
|
||||
#ifdef CONFIG_MMU
|
||||
atomic_long_t pgtables_bytes; /* size of all page tables */
|
||||
@ -899,6 +910,37 @@ static inline void vma_iter_init(struct vma_iterator *vmi,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
|
||||
enum mm_cid_state {
|
||||
MM_CID_UNSET = -1U, /* Unset state has lazy_put flag set. */
|
||||
MM_CID_LAZY_PUT = (1U << 31),
|
||||
};
|
||||
|
||||
static inline bool mm_cid_is_unset(int cid)
|
||||
{
|
||||
return cid == MM_CID_UNSET;
|
||||
}
|
||||
|
||||
static inline bool mm_cid_is_lazy_put(int cid)
|
||||
{
|
||||
return !mm_cid_is_unset(cid) && (cid & MM_CID_LAZY_PUT);
|
||||
}
|
||||
|
||||
static inline bool mm_cid_is_valid(int cid)
|
||||
{
|
||||
return !(cid & MM_CID_LAZY_PUT);
|
||||
}
|
||||
|
||||
static inline int mm_cid_set_lazy_put(int cid)
|
||||
{
|
||||
return cid | MM_CID_LAZY_PUT;
|
||||
}
|
||||
|
||||
static inline int mm_cid_clear_lazy_put(int cid)
|
||||
{
|
||||
return cid & ~MM_CID_LAZY_PUT;
|
||||
}
|
||||
|
||||
/* Accessor for struct mm_struct's cidmask. */
|
||||
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
|
||||
{
|
||||
@ -912,16 +954,40 @@ static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
|
||||
|
||||
static inline void mm_init_cid(struct mm_struct *mm)
|
||||
{
|
||||
raw_spin_lock_init(&mm->cid_lock);
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
|
||||
|
||||
pcpu_cid->cid = MM_CID_UNSET;
|
||||
pcpu_cid->time = 0;
|
||||
}
|
||||
cpumask_clear(mm_cidmask(mm));
|
||||
}
|
||||
|
||||
static inline int mm_alloc_cid(struct mm_struct *mm)
|
||||
{
|
||||
mm->pcpu_cid = alloc_percpu(struct mm_cid);
|
||||
if (!mm->pcpu_cid)
|
||||
return -ENOMEM;
|
||||
mm_init_cid(mm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mm_destroy_cid(struct mm_struct *mm)
|
||||
{
|
||||
free_percpu(mm->pcpu_cid);
|
||||
mm->pcpu_cid = NULL;
|
||||
}
|
||||
|
||||
static inline unsigned int mm_cid_size(void)
|
||||
{
|
||||
return cpumask_size();
|
||||
}
|
||||
#else /* CONFIG_SCHED_MM_CID */
|
||||
static inline void mm_init_cid(struct mm_struct *mm) { }
|
||||
static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
|
||||
static inline void mm_destroy_cid(struct mm_struct *mm) { }
|
||||
static inline unsigned int mm_cid_size(void)
|
||||
{
|
||||
return 0;
|
||||
|
@ -24,7 +24,7 @@ void psi_memstall_leave(unsigned long *flags);
|
||||
|
||||
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
|
||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
char *buf, enum psi_res res);
|
||||
char *buf, enum psi_res res, struct file *file);
|
||||
void psi_trigger_destroy(struct psi_trigger *t);
|
||||
|
||||
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
||||
|
@ -151,6 +151,9 @@ struct psi_trigger {
|
||||
|
||||
/* Deferred event(s) from previous ratelimit window */
|
||||
bool pending_event;
|
||||
|
||||
/* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
|
||||
enum psi_aggregators aggregator;
|
||||
};
|
||||
|
||||
struct psi_group {
|
||||
@ -171,30 +174,34 @@ struct psi_group {
|
||||
/* Aggregator work control */
|
||||
struct delayed_work avgs_work;
|
||||
|
||||
/* Unprivileged triggers against N*PSI_FREQ windows */
|
||||
struct list_head avg_triggers;
|
||||
u32 avg_nr_triggers[NR_PSI_STATES - 1];
|
||||
|
||||
/* Total stall times and sampled pressure averages */
|
||||
u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
|
||||
unsigned long avg[NR_PSI_STATES - 1][3];
|
||||
|
||||
/* Monitor work control */
|
||||
struct task_struct __rcu *poll_task;
|
||||
struct timer_list poll_timer;
|
||||
wait_queue_head_t poll_wait;
|
||||
atomic_t poll_wakeup;
|
||||
atomic_t poll_scheduled;
|
||||
/* Monitor RT polling work control */
|
||||
struct task_struct __rcu *rtpoll_task;
|
||||
struct timer_list rtpoll_timer;
|
||||
wait_queue_head_t rtpoll_wait;
|
||||
atomic_t rtpoll_wakeup;
|
||||
atomic_t rtpoll_scheduled;
|
||||
|
||||
/* Protects data used by the monitor */
|
||||
struct mutex trigger_lock;
|
||||
struct mutex rtpoll_trigger_lock;
|
||||
|
||||
/* Configured polling triggers */
|
||||
struct list_head triggers;
|
||||
u32 nr_triggers[NR_PSI_STATES - 1];
|
||||
u32 poll_states;
|
||||
u64 poll_min_period;
|
||||
/* Configured RT polling triggers */
|
||||
struct list_head rtpoll_triggers;
|
||||
u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
|
||||
u32 rtpoll_states;
|
||||
u64 rtpoll_min_period;
|
||||
|
||||
/* Total stall times at the start of monitor activation */
|
||||
u64 polling_total[NR_PSI_STATES - 1];
|
||||
u64 polling_next_update;
|
||||
u64 polling_until;
|
||||
/* Total stall times at the start of RT polling monitor activation */
|
||||
u64 rtpoll_total[NR_PSI_STATES - 1];
|
||||
u64 rtpoll_next_update;
|
||||
u64 rtpoll_until;
|
||||
};
|
||||
|
||||
#else /* CONFIG_PSI */
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/kcsan.h>
|
||||
#include <linux/rv.h>
|
||||
#include <linux/livepatch_sched.h>
|
||||
#include <asm/kmap_size.h>
|
||||
|
||||
/* task_struct member predeclarations (sorted alphabetically): */
|
||||
@ -1313,7 +1314,10 @@ struct task_struct {
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
int mm_cid; /* Current cid in mm */
|
||||
int last_mm_cid; /* Most recent cid in mm */
|
||||
int migrate_from_cpu;
|
||||
int mm_cid_active; /* Whether cid bitmap is active */
|
||||
struct callback_head cid_work;
|
||||
#endif
|
||||
|
||||
struct tlbflush_unmap_batch tlb_ubc;
|
||||
@ -2067,6 +2071,9 @@ extern int __cond_resched(void);
|
||||
|
||||
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
void sched_dynamic_klp_enable(void);
|
||||
void sched_dynamic_klp_disable(void);
|
||||
|
||||
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
|
||||
|
||||
static __always_inline int _cond_resched(void)
|
||||
@ -2075,6 +2082,7 @@ static __always_inline int _cond_resched(void)
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
|
||||
extern int dynamic_cond_resched(void);
|
||||
|
||||
static __always_inline int _cond_resched(void)
|
||||
@ -2082,20 +2090,25 @@ static __always_inline int _cond_resched(void)
|
||||
return dynamic_cond_resched();
|
||||
}
|
||||
|
||||
#else
|
||||
#else /* !CONFIG_PREEMPTION */
|
||||
|
||||
static inline int _cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
return __cond_resched();
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||
#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
#else
|
||||
#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
static inline int _cond_resched(void) { return 0; }
|
||||
static inline int _cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
|
||||
#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
#define cond_resched() ({ \
|
||||
__might_resched(__FILE__, __LINE__, 0); \
|
||||
|
@ -37,6 +37,11 @@ static inline void mmgrab(struct mm_struct *mm)
|
||||
atomic_inc(&mm->mm_count);
|
||||
}
|
||||
|
||||
static inline void smp_mb__after_mmgrab(void)
|
||||
{
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
extern void __mmdrop(struct mm_struct *mm);
|
||||
|
||||
static inline void mmdrop(struct mm_struct *mm)
|
||||
|
@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
|
||||
}
|
||||
|
||||
psi = cgroup_psi(cgrp);
|
||||
new = psi_trigger_create(psi, buf, res);
|
||||
new = psi_trigger_create(psi, buf, res, of->file);
|
||||
if (IS_ERR(new)) {
|
||||
cgroup_put(cgrp);
|
||||
return PTR_ERR(new);
|
||||
|
@ -924,6 +924,7 @@ void __mmdrop(struct mm_struct *mm)
|
||||
check_mm(mm);
|
||||
put_user_ns(mm->user_ns);
|
||||
mm_pasid_drop(mm);
|
||||
mm_destroy_cid(mm);
|
||||
|
||||
for (i = 0; i < NR_MM_COUNTERS; i++)
|
||||
percpu_counter_destroy(&mm->rss_stat[i]);
|
||||
@ -1188,7 +1189,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
tsk->mm_cid = -1;
|
||||
tsk->last_mm_cid = -1;
|
||||
tsk->mm_cid_active = 0;
|
||||
tsk->migrate_from_cpu = -1;
|
||||
#endif
|
||||
return tsk;
|
||||
|
||||
@ -1296,18 +1299,22 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
if (init_new_context(p, mm))
|
||||
goto fail_nocontext;
|
||||
|
||||
if (mm_alloc_cid(mm))
|
||||
goto fail_cid;
|
||||
|
||||
for (i = 0; i < NR_MM_COUNTERS; i++)
|
||||
if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
|
||||
goto fail_pcpu;
|
||||
|
||||
mm->user_ns = get_user_ns(user_ns);
|
||||
lru_gen_init_mm(mm);
|
||||
mm_init_cid(mm);
|
||||
return mm;
|
||||
|
||||
fail_pcpu:
|
||||
while (i > 0)
|
||||
percpu_counter_destroy(&mm->rss_stat[--i]);
|
||||
mm_destroy_cid(mm);
|
||||
fail_cid:
|
||||
destroy_context(mm);
|
||||
fail_nocontext:
|
||||
mm_free_pgd(mm);
|
||||
|
@ -33,6 +33,7 @@
|
||||
*
|
||||
* - klp_ftrace_handler()
|
||||
* - klp_update_patch_state()
|
||||
* - __klp_sched_try_switch()
|
||||
*/
|
||||
DEFINE_MUTEX(klp_mutex);
|
||||
|
||||
|
@ -9,11 +9,14 @@
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/static_call.h>
|
||||
#include "core.h"
|
||||
#include "patch.h"
|
||||
#include "transition.h"
|
||||
|
||||
#define MAX_STACK_ENTRIES 100
|
||||
DEFINE_PER_CPU(unsigned long[MAX_STACK_ENTRIES], klp_stack_entries);
|
||||
|
||||
#define STACK_ERR_BUF_SIZE 128
|
||||
|
||||
#define SIGNALS_TIMEOUT 15
|
||||
@ -24,6 +27,25 @@ static int klp_target_state = KLP_UNDEFINED;
|
||||
|
||||
static unsigned int klp_signals_cnt;
|
||||
|
||||
/*
|
||||
* When a livepatch is in progress, enable klp stack checking in
|
||||
* cond_resched(). This helps CPU-bound kthreads get patched.
|
||||
*/
|
||||
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
#define klp_cond_resched_enable() sched_dynamic_klp_enable()
|
||||
#define klp_cond_resched_disable() sched_dynamic_klp_disable()
|
||||
|
||||
#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
|
||||
EXPORT_SYMBOL(klp_sched_try_switch_key);
|
||||
|
||||
#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
|
||||
#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
|
||||
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
/*
|
||||
* This work can be performed periodically to finish patching or unpatching any
|
||||
* "straggler" tasks which failed to transition in the first attempt.
|
||||
@ -172,8 +194,8 @@ void klp_update_patch_state(struct task_struct *task)
|
||||
* barrier (smp_rmb) for two cases:
|
||||
*
|
||||
* 1) Enforce the order of the TIF_PATCH_PENDING read and the
|
||||
* klp_target_state read. The corresponding write barrier is in
|
||||
* klp_init_transition().
|
||||
* klp_target_state read. The corresponding write barriers are in
|
||||
* klp_init_transition() and klp_reverse_transition().
|
||||
*
|
||||
* 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
|
||||
* of func->transition, if klp_ftrace_handler() is called later on
|
||||
@ -240,12 +262,15 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
|
||||
*/
|
||||
static int klp_check_stack(struct task_struct *task, const char **oldname)
|
||||
{
|
||||
static unsigned long entries[MAX_STACK_ENTRIES];
|
||||
unsigned long *entries = this_cpu_ptr(klp_stack_entries);
|
||||
struct klp_object *obj;
|
||||
struct klp_func *func;
|
||||
int ret, nr_entries;
|
||||
|
||||
ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
|
||||
/* Protect 'klp_stack_entries' */
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
ret = stack_trace_save_tsk_reliable(task, entries, MAX_STACK_ENTRIES);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
nr_entries = ret;
|
||||
@ -307,7 +332,11 @@ static bool klp_try_switch_task(struct task_struct *task)
|
||||
* functions. If all goes well, switch the task to the target patch
|
||||
* state.
|
||||
*/
|
||||
ret = task_call_func(task, klp_check_and_switch_task, &old_name);
|
||||
if (task == current)
|
||||
ret = klp_check_and_switch_task(current, &old_name);
|
||||
else
|
||||
ret = task_call_func(task, klp_check_and_switch_task, &old_name);
|
||||
|
||||
switch (ret) {
|
||||
case 0: /* success */
|
||||
break;
|
||||
@ -334,6 +363,44 @@ static bool klp_try_switch_task(struct task_struct *task)
|
||||
return !ret;
|
||||
}
|
||||
|
||||
void __klp_sched_try_switch(void)
|
||||
{
|
||||
if (likely(!klp_patch_pending(current)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* This function is called from cond_resched() which is called in many
|
||||
* places throughout the kernel. Using the klp_mutex here might
|
||||
* deadlock.
|
||||
*
|
||||
* Instead, disable preemption to prevent racing with other callers of
|
||||
* klp_try_switch_task(). Thanks to task_call_func() they won't be
|
||||
* able to switch this task while it's running.
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Make sure current didn't get patched between the above check and
|
||||
* preempt_disable().
|
||||
*/
|
||||
if (unlikely(!klp_patch_pending(current)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Enforce the order of the TIF_PATCH_PENDING read above and the
|
||||
* klp_target_state read in klp_try_switch_task(). The corresponding
|
||||
* write barriers are in klp_init_transition() and
|
||||
* klp_reverse_transition().
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
klp_try_switch_task(current);
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__klp_sched_try_switch);
|
||||
|
||||
/*
|
||||
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
|
||||
* Kthreads with TIF_PATCH_PENDING set are woken up.
|
||||
@ -440,7 +507,8 @@ void klp_try_complete_transition(void)
|
||||
return;
|
||||
}
|
||||
|
||||
/* we're done, now cleanup the data structures */
|
||||
/* Done! Now cleanup the data structures. */
|
||||
klp_cond_resched_disable();
|
||||
patch = klp_transition_patch;
|
||||
klp_complete_transition();
|
||||
|
||||
@ -492,6 +560,8 @@ void klp_start_transition(void)
|
||||
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
|
||||
}
|
||||
|
||||
klp_cond_resched_enable();
|
||||
|
||||
klp_signals_cnt = 0;
|
||||
}
|
||||
|
||||
@ -547,8 +617,9 @@ void klp_init_transition(struct klp_patch *patch, int state)
|
||||
* see a func in transition with a task->patch_state of KLP_UNDEFINED.
|
||||
*
|
||||
* Also enforce the order of the klp_target_state write and future
|
||||
* TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
|
||||
* set a task->patch_state to KLP_UNDEFINED.
|
||||
* TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
|
||||
* __klp_sched_try_switch() don't set a task->patch_state to
|
||||
* KLP_UNDEFINED.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
@ -584,14 +655,10 @@ void klp_reverse_transition(void)
|
||||
klp_target_state == KLP_PATCHED ? "patching to unpatching" :
|
||||
"unpatching to patching");
|
||||
|
||||
klp_transition_patch->enabled = !klp_transition_patch->enabled;
|
||||
|
||||
klp_target_state = !klp_target_state;
|
||||
|
||||
/*
|
||||
* Clear all TIF_PATCH_PENDING flags to prevent races caused by
|
||||
* klp_update_patch_state() running in parallel with
|
||||
* klp_start_transition().
|
||||
* klp_update_patch_state() or __klp_sched_try_switch() running in
|
||||
* parallel with the reverse transition.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(g, task)
|
||||
@ -601,9 +668,28 @@ void klp_reverse_transition(void)
|
||||
for_each_possible_cpu(cpu)
|
||||
clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
|
||||
|
||||
/* Let any remaining calls to klp_update_patch_state() complete */
|
||||
/*
|
||||
* Make sure all existing invocations of klp_update_patch_state() and
|
||||
* __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
|
||||
* starting the reverse transition.
|
||||
*/
|
||||
klp_synchronize_transition();
|
||||
|
||||
/*
|
||||
* All patching has stopped, now re-initialize the global variables to
|
||||
* prepare for the reverse transition.
|
||||
*/
|
||||
klp_transition_patch->enabled = !klp_transition_patch->enabled;
|
||||
klp_target_state = !klp_target_state;
|
||||
|
||||
/*
|
||||
* Enforce the order of the klp_target_state write and the
|
||||
* TIF_PATCH_PENDING writes in klp_start_transition() to ensure
|
||||
* klp_update_patch_state() and __klp_sched_try_switch() don't set
|
||||
* task->patch_state to the wrong value.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
klp_start_transition();
|
||||
}
|
||||
|
||||
@ -617,9 +703,9 @@ void klp_copy_process(struct task_struct *child)
|
||||
* the task flag up to date with the parent here.
|
||||
*
|
||||
* The operation is serialized against all klp_*_transition()
|
||||
* operations by the tasklist_lock. The only exception is
|
||||
* klp_update_patch_state(current), but we cannot race with
|
||||
* that because we are current.
|
||||
* operations by the tasklist_lock. The only exceptions are
|
||||
* klp_update_patch_state(current) and __klp_sched_try_switch(), but we
|
||||
* cannot race with them because we are current.
|
||||
*/
|
||||
if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
|
||||
set_tsk_thread_flag(child, TIF_PATCH_PENDING);
|
||||
|
@ -300,6 +300,9 @@ noinstr u64 local_clock(void)
|
||||
if (static_branch_likely(&__sched_clock_stable))
|
||||
return sched_clock() + __sched_clock_offset;
|
||||
|
||||
if (!static_branch_likely(&sched_clock_running))
|
||||
return sched_clock();
|
||||
|
||||
preempt_disable_notrace();
|
||||
clock = sched_clock_local(this_scd());
|
||||
preempt_enable_notrace();
|
||||
|
@ -261,36 +261,51 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find left-most (aka, highest priority) task matching @cookie.
|
||||
*/
|
||||
static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
|
||||
static int sched_task_is_throttled(struct task_struct *p, int cpu)
|
||||
{
|
||||
struct rb_node *node;
|
||||
if (p->sched_class->task_is_throttled)
|
||||
return p->sched_class->task_is_throttled(p, cpu);
|
||||
|
||||
node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
|
||||
/*
|
||||
* The idle task always matches any cookie!
|
||||
*/
|
||||
if (!node)
|
||||
return idle_sched_class.pick_task(rq);
|
||||
|
||||
return __node_2_sc(node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
|
||||
{
|
||||
struct rb_node *node = &p->core_node;
|
||||
int cpu = task_cpu(p);
|
||||
|
||||
node = rb_next(node);
|
||||
do {
|
||||
node = rb_next(node);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
p = __node_2_sc(node);
|
||||
if (p->core_cookie != cookie)
|
||||
return NULL;
|
||||
|
||||
} while (sched_task_is_throttled(p, cpu));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find left-most (aka, highest priority) and unthrottled task matching @cookie.
|
||||
* If no suitable task is found, NULL will be returned.
|
||||
*/
|
||||
static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct rb_node *node;
|
||||
|
||||
node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
p = container_of(node, struct task_struct, core_node);
|
||||
if (p->core_cookie != cookie)
|
||||
return NULL;
|
||||
p = __node_2_sc(node);
|
||||
if (!sched_task_is_throttled(p, rq->cpu))
|
||||
return p;
|
||||
|
||||
return p;
|
||||
return sched_core_next(p, cookie);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2087,6 +2102,8 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (task_on_rq_migrating(p))
|
||||
flags |= ENQUEUE_MIGRATED;
|
||||
if (flags & ENQUEUE_MIGRATED)
|
||||
sched_mm_cid_migrate_to(rq, p);
|
||||
|
||||
enqueue_task(rq, p, flags);
|
||||
|
||||
@ -3196,6 +3213,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
p->sched_class->migrate_task_rq(p, new_cpu);
|
||||
p->se.nr_migrations++;
|
||||
rseq_migrate(p);
|
||||
sched_mm_cid_migrate_from(p);
|
||||
perf_event_task_migrate(p);
|
||||
}
|
||||
|
||||
@ -4469,6 +4487,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
p->wake_entry.u_flags = CSD_TYPE_TTWU;
|
||||
p->migration_pending = NULL;
|
||||
#endif
|
||||
init_sched_mm_cid(p);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
|
||||
@ -5115,7 +5134,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
|
||||
sched_info_switch(rq, prev, next);
|
||||
perf_event_task_sched_out(prev, next);
|
||||
rseq_preempt(prev);
|
||||
switch_mm_cid(prev, next);
|
||||
fire_sched_out_preempt_notifiers(prev, next);
|
||||
kmap_local_sched_out();
|
||||
prepare_task(next);
|
||||
@ -5272,6 +5290,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||
*
|
||||
* kernel -> user switch + mmdrop_lazy_tlb() active
|
||||
* user -> user switch
|
||||
*
|
||||
* switch_mm_cid() needs to be updated if the barriers provided
|
||||
* by context_switch() are modified.
|
||||
*/
|
||||
if (!next->mm) { // to kernel
|
||||
enter_lazy_tlb(prev->active_mm, next);
|
||||
@ -5301,6 +5322,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||
}
|
||||
}
|
||||
|
||||
/* switch_mm_cid() requires the memory barriers above. */
|
||||
switch_mm_cid(rq, prev, next);
|
||||
|
||||
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
|
||||
|
||||
prepare_lock_switch(rq, next, rf);
|
||||
@ -5589,6 +5613,7 @@ void scheduler_tick(void)
|
||||
resched_latency = cpu_resched_latency(rq);
|
||||
calc_global_load_tick(rq);
|
||||
sched_core_tick(rq);
|
||||
task_tick_mm_cid(rq, curr);
|
||||
|
||||
rq_unlock(rq, &rf);
|
||||
|
||||
@ -6241,7 +6266,7 @@ static bool try_steal_cookie(int this, int that)
|
||||
goto unlock;
|
||||
|
||||
p = sched_core_find(src, cookie);
|
||||
if (p == src->idle)
|
||||
if (!p)
|
||||
goto unlock;
|
||||
|
||||
do {
|
||||
@ -6253,6 +6278,13 @@ static bool try_steal_cookie(int this, int that)
|
||||
|
||||
if (p->core_occupation > dst->idle->core_occupation)
|
||||
goto next;
|
||||
/*
|
||||
* sched_core_find() and sched_core_next() will ensure that task @p
|
||||
* is not throttled now, we also need to check whether the runqueue
|
||||
* of the destination CPU is being throttled.
|
||||
*/
|
||||
if (sched_task_is_throttled(p, this))
|
||||
goto next;
|
||||
|
||||
deactivate_task(src, p, 0);
|
||||
set_task_cpu(p, this);
|
||||
@ -8508,6 +8540,7 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
|
||||
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
|
||||
int __sched dynamic_cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
|
||||
return 0;
|
||||
return __cond_resched();
|
||||
@ -8656,13 +8689,17 @@ int sched_dynamic_mode(const char *str)
|
||||
#error "Unsupported PREEMPT_DYNAMIC mechanism"
|
||||
#endif
|
||||
|
||||
void sched_dynamic_update(int mode)
|
||||
static DEFINE_MUTEX(sched_dynamic_mutex);
|
||||
static bool klp_override;
|
||||
|
||||
static void __sched_dynamic_update(int mode)
|
||||
{
|
||||
/*
|
||||
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
||||
* the ZERO state, which is invalid.
|
||||
*/
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||
@ -8670,36 +8707,79 @@ void sched_dynamic_update(int mode)
|
||||
|
||||
switch (mode) {
|
||||
case preempt_dynamic_none:
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: none\n");
|
||||
if (mode != preempt_dynamic_mode)
|
||||
pr_info("Dynamic Preempt: none\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_voluntary:
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: voluntary\n");
|
||||
if (mode != preempt_dynamic_mode)
|
||||
pr_info("Dynamic Preempt: voluntary\n");
|
||||
break;
|
||||
|
||||
case preempt_dynamic_full:
|
||||
preempt_dynamic_disable(cond_resched);
|
||||
if (!klp_override)
|
||||
preempt_dynamic_disable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
if (mode != preempt_dynamic_mode)
|
||||
pr_info("Dynamic Preempt: full\n");
|
||||
break;
|
||||
}
|
||||
|
||||
preempt_dynamic_mode = mode;
|
||||
}
|
||||
|
||||
void sched_dynamic_update(int mode)
|
||||
{
|
||||
mutex_lock(&sched_dynamic_mutex);
|
||||
__sched_dynamic_update(mode);
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
|
||||
|
||||
static int klp_cond_resched(void)
|
||||
{
|
||||
__klp_sched_try_switch();
|
||||
return __cond_resched();
|
||||
}
|
||||
|
||||
void sched_dynamic_klp_enable(void)
|
||||
{
|
||||
mutex_lock(&sched_dynamic_mutex);
|
||||
|
||||
klp_override = true;
|
||||
static_call_update(cond_resched, klp_cond_resched);
|
||||
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
void sched_dynamic_klp_disable(void)
|
||||
{
|
||||
mutex_lock(&sched_dynamic_mutex);
|
||||
|
||||
klp_override = false;
|
||||
__sched_dynamic_update(preempt_dynamic_mode);
|
||||
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
static int __init setup_preempt_mode(char *str)
|
||||
{
|
||||
int mode = sched_dynamic_mode(str);
|
||||
@ -10334,7 +10414,7 @@ void sched_release_group(struct task_group *tg)
|
||||
spin_unlock_irqrestore(&task_group_lock, flags);
|
||||
}
|
||||
|
||||
static void sched_change_group(struct task_struct *tsk)
|
||||
static struct task_group *sched_get_task_group(struct task_struct *tsk)
|
||||
{
|
||||
struct task_group *tg;
|
||||
|
||||
@ -10346,7 +10426,13 @@ static void sched_change_group(struct task_struct *tsk)
|
||||
tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
|
||||
struct task_group, css);
|
||||
tg = autogroup_task_group(tsk, tg);
|
||||
tsk->sched_task_group = tg;
|
||||
|
||||
return tg;
|
||||
}
|
||||
|
||||
static void sched_change_group(struct task_struct *tsk, struct task_group *group)
|
||||
{
|
||||
tsk->sched_task_group = group;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
if (tsk->sched_class->task_change_group)
|
||||
@ -10367,10 +10453,19 @@ void sched_move_task(struct task_struct *tsk)
|
||||
{
|
||||
int queued, running, queue_flags =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
struct task_group *group;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(tsk, &rf);
|
||||
/*
|
||||
* Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous
|
||||
* group changes.
|
||||
*/
|
||||
group = sched_get_task_group(tsk);
|
||||
if (group == tsk->sched_task_group)
|
||||
goto unlock;
|
||||
|
||||
update_rq_clock(rq);
|
||||
|
||||
running = task_current(rq, tsk);
|
||||
@ -10381,7 +10476,7 @@ void sched_move_task(struct task_struct *tsk)
|
||||
if (running)
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
sched_change_group(tsk);
|
||||
sched_change_group(tsk, group);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, queue_flags);
|
||||
@ -10395,6 +10490,7 @@ void sched_move_task(struct task_struct *tsk)
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
unlock:
|
||||
task_rq_unlock(rq, tsk, &rf);
|
||||
}
|
||||
|
||||
@ -11385,45 +11481,524 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
void sched_mm_cid_exit_signals(struct task_struct *t)
|
||||
|
||||
/**
|
||||
* @cid_lock: Guarantee forward-progress of cid allocation.
|
||||
*
|
||||
* Concurrency ID allocation within a bitmap is mostly lock-free. The cid_lock
|
||||
* is only used when contention is detected by the lock-free allocation so
|
||||
* forward progress can be guaranteed.
|
||||
*/
|
||||
DEFINE_RAW_SPINLOCK(cid_lock);
|
||||
|
||||
/**
|
||||
* @use_cid_lock: Select cid allocation behavior: lock-free vs spinlock.
|
||||
*
|
||||
* When @use_cid_lock is 0, the cid allocation is lock-free. When contention is
|
||||
* detected, it is set to 1 to ensure that all newly coming allocations are
|
||||
* serialized by @cid_lock until the allocation which detected contention
|
||||
* completes and sets @use_cid_lock back to 0. This guarantees forward progress
|
||||
* of a cid allocation.
|
||||
*/
|
||||
int use_cid_lock;
|
||||
|
||||
/*
|
||||
* mm_cid remote-clear implements a lock-free algorithm to clear per-mm/cpu cid
|
||||
* concurrently with respect to the execution of the source runqueue context
|
||||
* switch.
|
||||
*
|
||||
* There is one basic properties we want to guarantee here:
|
||||
*
|
||||
* (1) Remote-clear should _never_ mark a per-cpu cid UNSET when it is actively
|
||||
* used by a task. That would lead to concurrent allocation of the cid and
|
||||
* userspace corruption.
|
||||
*
|
||||
* Provide this guarantee by introducing a Dekker memory ordering to guarantee
|
||||
* that a pair of loads observe at least one of a pair of stores, which can be
|
||||
* shown as:
|
||||
*
|
||||
* X = Y = 0
|
||||
*
|
||||
* w[X]=1 w[Y]=1
|
||||
* MB MB
|
||||
* r[Y]=y r[X]=x
|
||||
*
|
||||
* Which guarantees that x==0 && y==0 is impossible. But rather than using
|
||||
* values 0 and 1, this algorithm cares about specific state transitions of the
|
||||
* runqueue current task (as updated by the scheduler context switch), and the
|
||||
* per-mm/cpu cid value.
|
||||
*
|
||||
* Let's introduce task (Y) which has task->mm == mm and task (N) which has
|
||||
* task->mm != mm for the rest of the discussion. There are two scheduler state
|
||||
* transitions on context switch we care about:
|
||||
*
|
||||
* (TSA) Store to rq->curr with transition from (N) to (Y)
|
||||
*
|
||||
* (TSB) Store to rq->curr with transition from (Y) to (N)
|
||||
*
|
||||
* On the remote-clear side, there is one transition we care about:
|
||||
*
|
||||
* (TMA) cmpxchg to *pcpu_cid to set the LAZY flag
|
||||
*
|
||||
* There is also a transition to UNSET state which can be performed from all
|
||||
* sides (scheduler, remote-clear). It is always performed with a cmpxchg which
|
||||
* guarantees that only a single thread will succeed:
|
||||
*
|
||||
* (TMB) cmpxchg to *pcpu_cid to mark UNSET
|
||||
*
|
||||
* Just to be clear, what we do _not_ want to happen is a transition to UNSET
|
||||
* when a thread is actively using the cid (property (1)).
|
||||
*
|
||||
* Let's looks at the relevant combinations of TSA/TSB, and TMA transitions.
|
||||
*
|
||||
* Scenario A) (TSA)+(TMA) (from next task perspective)
|
||||
*
|
||||
* CPU0 CPU1
|
||||
*
|
||||
* Context switch CS-1 Remote-clear
|
||||
* - store to rq->curr: (N)->(Y) (TSA) - cmpxchg to *pcpu_id to LAZY (TMA)
|
||||
* (implied barrier after cmpxchg)
|
||||
* - switch_mm_cid()
|
||||
* - memory barrier (see switch_mm_cid()
|
||||
* comment explaining how this barrier
|
||||
* is combined with other scheduler
|
||||
* barriers)
|
||||
* - mm_cid_get (next)
|
||||
* - READ_ONCE(*pcpu_cid) - rcu_dereference(src_rq->curr)
|
||||
*
|
||||
* This Dekker ensures that either task (Y) is observed by the
|
||||
* rcu_dereference() or the LAZY flag is observed by READ_ONCE(), or both are
|
||||
* observed.
|
||||
*
|
||||
* If task (Y) store is observed by rcu_dereference(), it means that there is
|
||||
* still an active task on the cpu. Remote-clear will therefore not transition
|
||||
* to UNSET, which fulfills property (1).
|
||||
*
|
||||
* If task (Y) is not observed, but the lazy flag is observed by READ_ONCE(),
|
||||
* it will move its state to UNSET, which clears the percpu cid perhaps
|
||||
* uselessly (which is not an issue for correctness). Because task (Y) is not
|
||||
* observed, CPU1 can move ahead to set the state to UNSET. Because moving
|
||||
* state to UNSET is done with a cmpxchg expecting that the old state has the
|
||||
* LAZY flag set, only one thread will successfully UNSET.
|
||||
*
|
||||
* If both states (LAZY flag and task (Y)) are observed, the thread on CPU0
|
||||
* will observe the LAZY flag and transition to UNSET (perhaps uselessly), and
|
||||
* CPU1 will observe task (Y) and do nothing more, which is fine.
|
||||
*
|
||||
* What we are effectively preventing with this Dekker is a scenario where
|
||||
* neither LAZY flag nor store (Y) are observed, which would fail property (1)
|
||||
* because this would UNSET a cid which is actively used.
|
||||
*/
|
||||
|
||||
void sched_mm_cid_migrate_from(struct task_struct *t)
|
||||
{
|
||||
t->migrate_from_cpu = task_cpu(t);
|
||||
}
|
||||
|
||||
static
|
||||
int __sched_mm_cid_migrate_from_fetch_cid(struct rq *src_rq,
|
||||
struct task_struct *t,
|
||||
struct mm_cid *src_pcpu_cid)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
unsigned long flags;
|
||||
struct task_struct *src_task;
|
||||
int src_cid, last_mm_cid;
|
||||
|
||||
if (!mm)
|
||||
return -1;
|
||||
|
||||
last_mm_cid = t->last_mm_cid;
|
||||
/*
|
||||
* If the migrated task has no last cid, or if the current
|
||||
* task on src rq uses the cid, it means the source cid does not need
|
||||
* to be moved to the destination cpu.
|
||||
*/
|
||||
if (last_mm_cid == -1)
|
||||
return -1;
|
||||
src_cid = READ_ONCE(src_pcpu_cid->cid);
|
||||
if (!mm_cid_is_valid(src_cid) || last_mm_cid != src_cid)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* If we observe an active task using the mm on this rq, it means we
|
||||
* are not the last task to be migrated from this cpu for this mm, so
|
||||
* there is no need to move src_cid to the destination cpu.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
src_task = rcu_dereference(src_rq->curr);
|
||||
if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
|
||||
rcu_read_unlock();
|
||||
t->last_mm_cid = -1;
|
||||
return -1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return src_cid;
|
||||
}
|
||||
|
||||
static
|
||||
int __sched_mm_cid_migrate_from_try_steal_cid(struct rq *src_rq,
|
||||
struct task_struct *t,
|
||||
struct mm_cid *src_pcpu_cid,
|
||||
int src_cid)
|
||||
{
|
||||
struct task_struct *src_task;
|
||||
struct mm_struct *mm = t->mm;
|
||||
int lazy_cid;
|
||||
|
||||
if (src_cid == -1)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Attempt to clear the source cpu cid to move it to the destination
|
||||
* cpu.
|
||||
*/
|
||||
lazy_cid = mm_cid_set_lazy_put(src_cid);
|
||||
if (!try_cmpxchg(&src_pcpu_cid->cid, &src_cid, lazy_cid))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* The implicit barrier after cmpxchg per-mm/cpu cid before loading
|
||||
* rq->curr->mm matches the scheduler barrier in context_switch()
|
||||
* between store to rq->curr and load of prev and next task's
|
||||
* per-mm/cpu cid.
|
||||
*
|
||||
* The implicit barrier after cmpxchg per-mm/cpu cid before loading
|
||||
* rq->curr->mm_cid_active matches the barrier in
|
||||
* sched_mm_cid_exit_signals(), sched_mm_cid_before_execve(), and
|
||||
* sched_mm_cid_after_execve() between store to t->mm_cid_active and
|
||||
* load of per-mm/cpu cid.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If we observe an active task using the mm on this rq after setting
|
||||
* the lazy-put flag, this task will be responsible for transitioning
|
||||
* from lazy-put flag set to MM_CID_UNSET.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
src_task = rcu_dereference(src_rq->curr);
|
||||
if (READ_ONCE(src_task->mm_cid_active) && src_task->mm == mm) {
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* We observed an active task for this mm, there is therefore
|
||||
* no point in moving this cid to the destination cpu.
|
||||
*/
|
||||
t->last_mm_cid = -1;
|
||||
return -1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* The src_cid is unused, so it can be unset.
|
||||
*/
|
||||
if (!try_cmpxchg(&src_pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
|
||||
return -1;
|
||||
return src_cid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Migration to dst cpu. Called with dst_rq lock held.
|
||||
* Interrupts are disabled, which keeps the window of cid ownership without the
|
||||
* source rq lock held small.
|
||||
*/
|
||||
void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
|
||||
{
|
||||
struct mm_cid *src_pcpu_cid, *dst_pcpu_cid;
|
||||
struct mm_struct *mm = t->mm;
|
||||
int src_cid, dst_cid, src_cpu;
|
||||
struct rq *src_rq;
|
||||
|
||||
lockdep_assert_rq_held(dst_rq);
|
||||
|
||||
if (!mm)
|
||||
return;
|
||||
src_cpu = t->migrate_from_cpu;
|
||||
if (src_cpu == -1) {
|
||||
t->last_mm_cid = -1;
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Move the src cid if the dst cid is unset. This keeps id
|
||||
* allocation closest to 0 in cases where few threads migrate around
|
||||
* many cpus.
|
||||
*
|
||||
* If destination cid is already set, we may have to just clear
|
||||
* the src cid to ensure compactness in frequent migrations
|
||||
* scenarios.
|
||||
*
|
||||
* It is not useful to clear the src cid when the number of threads is
|
||||
* greater or equal to the number of allowed cpus, because user-space
|
||||
* can expect that the number of allowed cids can reach the number of
|
||||
* allowed cpus.
|
||||
*/
|
||||
dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq));
|
||||
dst_cid = READ_ONCE(dst_pcpu_cid->cid);
|
||||
if (!mm_cid_is_unset(dst_cid) &&
|
||||
atomic_read(&mm->mm_users) >= t->nr_cpus_allowed)
|
||||
return;
|
||||
src_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, src_cpu);
|
||||
src_rq = cpu_rq(src_cpu);
|
||||
src_cid = __sched_mm_cid_migrate_from_fetch_cid(src_rq, t, src_pcpu_cid);
|
||||
if (src_cid == -1)
|
||||
return;
|
||||
src_cid = __sched_mm_cid_migrate_from_try_steal_cid(src_rq, t, src_pcpu_cid,
|
||||
src_cid);
|
||||
if (src_cid == -1)
|
||||
return;
|
||||
if (!mm_cid_is_unset(dst_cid)) {
|
||||
__mm_cid_put(mm, src_cid);
|
||||
return;
|
||||
}
|
||||
/* Move src_cid to dst cpu. */
|
||||
mm_cid_snapshot_time(dst_rq, mm);
|
||||
WRITE_ONCE(dst_pcpu_cid->cid, src_cid);
|
||||
}
|
||||
|
||||
static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
|
||||
int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct task_struct *t;
|
||||
unsigned long flags;
|
||||
int cid, lazy_cid;
|
||||
|
||||
cid = READ_ONCE(pcpu_cid->cid);
|
||||
if (!mm_cid_is_valid(cid))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Clear the cpu cid if it is set to keep cid allocation compact. If
|
||||
* there happens to be other tasks left on the source cpu using this
|
||||
* mm, the next task using this mm will reallocate its cid on context
|
||||
* switch.
|
||||
*/
|
||||
lazy_cid = mm_cid_set_lazy_put(cid);
|
||||
if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The implicit barrier after cmpxchg per-mm/cpu cid before loading
|
||||
* rq->curr->mm matches the scheduler barrier in context_switch()
|
||||
* between store to rq->curr and load of prev and next task's
|
||||
* per-mm/cpu cid.
|
||||
*
|
||||
* The implicit barrier after cmpxchg per-mm/cpu cid before loading
|
||||
* rq->curr->mm_cid_active matches the barrier in
|
||||
* sched_mm_cid_exit_signals(), sched_mm_cid_before_execve(), and
|
||||
* sched_mm_cid_after_execve() between store to t->mm_cid_active and
|
||||
* load of per-mm/cpu cid.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If we observe an active task using the mm on this rq after setting
|
||||
* the lazy-put flag, that task will be responsible for transitioning
|
||||
* from lazy-put flag set to MM_CID_UNSET.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
t = rcu_dereference(rq->curr);
|
||||
if (READ_ONCE(t->mm_cid_active) && t->mm == mm) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* The cid is unused, so it can be unset.
|
||||
* Disable interrupts to keep the window of cid ownership without rq
|
||||
* lock small.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
mm_cid_put(mm, t->mm_cid);
|
||||
t->mm_cid = -1;
|
||||
t->mm_cid_active = 0;
|
||||
if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
|
||||
__mm_cid_put(mm, cid);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void sched_mm_cid_remote_clear_old(struct mm_struct *mm, int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct mm_cid *pcpu_cid;
|
||||
struct task_struct *curr;
|
||||
u64 rq_clock;
|
||||
|
||||
/*
|
||||
* rq->clock load is racy on 32-bit but one spurious clear once in a
|
||||
* while is irrelevant.
|
||||
*/
|
||||
rq_clock = READ_ONCE(rq->clock);
|
||||
pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu);
|
||||
|
||||
/*
|
||||
* In order to take care of infrequently scheduled tasks, bump the time
|
||||
* snapshot associated with this cid if an active task using the mm is
|
||||
* observed on this rq.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
curr = rcu_dereference(rq->curr);
|
||||
if (READ_ONCE(curr->mm_cid_active) && curr->mm == mm) {
|
||||
WRITE_ONCE(pcpu_cid->time, rq_clock);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (rq_clock < pcpu_cid->time + SCHED_MM_CID_PERIOD_NS)
|
||||
return;
|
||||
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
|
||||
}
|
||||
|
||||
static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
|
||||
int weight)
|
||||
{
|
||||
struct mm_cid *pcpu_cid;
|
||||
int cid;
|
||||
|
||||
pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu);
|
||||
cid = READ_ONCE(pcpu_cid->cid);
|
||||
if (!mm_cid_is_valid(cid) || cid < weight)
|
||||
return;
|
||||
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
|
||||
}
|
||||
|
||||
static void task_mm_cid_work(struct callback_head *work)
|
||||
{
|
||||
unsigned long now = jiffies, old_scan, next_scan;
|
||||
struct task_struct *t = current;
|
||||
struct cpumask *cidmask;
|
||||
struct mm_struct *mm;
|
||||
int weight, cpu;
|
||||
|
||||
SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
|
||||
|
||||
work->next = work; /* Prevent double-add */
|
||||
if (t->flags & PF_EXITING)
|
||||
return;
|
||||
mm = t->mm;
|
||||
if (!mm)
|
||||
return;
|
||||
old_scan = READ_ONCE(mm->mm_cid_next_scan);
|
||||
next_scan = now + msecs_to_jiffies(MM_CID_SCAN_DELAY);
|
||||
if (!old_scan) {
|
||||
unsigned long res;
|
||||
|
||||
res = cmpxchg(&mm->mm_cid_next_scan, old_scan, next_scan);
|
||||
if (res != old_scan)
|
||||
old_scan = res;
|
||||
else
|
||||
old_scan = next_scan;
|
||||
}
|
||||
if (time_before(now, old_scan))
|
||||
return;
|
||||
if (!try_cmpxchg(&mm->mm_cid_next_scan, &old_scan, next_scan))
|
||||
return;
|
||||
cidmask = mm_cidmask(mm);
|
||||
/* Clear cids that were not recently used. */
|
||||
for_each_possible_cpu(cpu)
|
||||
sched_mm_cid_remote_clear_old(mm, cpu);
|
||||
weight = cpumask_weight(cidmask);
|
||||
/*
|
||||
* Clear cids that are greater or equal to the cidmask weight to
|
||||
* recompact it.
|
||||
*/
|
||||
for_each_possible_cpu(cpu)
|
||||
sched_mm_cid_remote_clear_weight(mm, cpu, weight);
|
||||
}
|
||||
|
||||
void init_sched_mm_cid(struct task_struct *t)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
int mm_users = 0;
|
||||
|
||||
if (mm) {
|
||||
mm_users = atomic_read(&mm->mm_users);
|
||||
if (mm_users == 1)
|
||||
mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
|
||||
}
|
||||
t->cid_work.next = &t->cid_work; /* Protect against double add */
|
||||
init_task_work(&t->cid_work, task_mm_cid_work);
|
||||
}
|
||||
|
||||
void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
|
||||
{
|
||||
struct callback_head *work = &curr->cid_work;
|
||||
unsigned long now = jiffies;
|
||||
|
||||
if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
|
||||
work->next != work)
|
||||
return;
|
||||
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
||||
return;
|
||||
task_work_add(curr, work, TWA_RESUME);
|
||||
}
|
||||
|
||||
void sched_mm_cid_exit_signals(struct task_struct *t)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (!mm)
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
rq = this_rq();
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
preempt_enable_no_resched(); /* holding spinlock */
|
||||
WRITE_ONCE(t->mm_cid_active, 0);
|
||||
/*
|
||||
* Store t->mm_cid_active before loading per-mm/cpu cid.
|
||||
* Matches barrier in sched_mm_cid_remote_clear_old().
|
||||
*/
|
||||
smp_mb();
|
||||
mm_cid_put(mm);
|
||||
t->last_mm_cid = t->mm_cid = -1;
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
void sched_mm_cid_before_execve(struct task_struct *t)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (!mm)
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
mm_cid_put(mm, t->mm_cid);
|
||||
t->mm_cid = -1;
|
||||
t->mm_cid_active = 0;
|
||||
local_irq_restore(flags);
|
||||
|
||||
preempt_disable();
|
||||
rq = this_rq();
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
preempt_enable_no_resched(); /* holding spinlock */
|
||||
WRITE_ONCE(t->mm_cid_active, 0);
|
||||
/*
|
||||
* Store t->mm_cid_active before loading per-mm/cpu cid.
|
||||
* Matches barrier in sched_mm_cid_remote_clear_old().
|
||||
*/
|
||||
smp_mb();
|
||||
mm_cid_put(mm);
|
||||
t->last_mm_cid = t->mm_cid = -1;
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
void sched_mm_cid_after_execve(struct task_struct *t)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (!mm)
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
t->mm_cid = mm_cid_get(mm);
|
||||
t->mm_cid_active = 1;
|
||||
local_irq_restore(flags);
|
||||
|
||||
preempt_disable();
|
||||
rq = this_rq();
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
preempt_enable_no_resched(); /* holding spinlock */
|
||||
WRITE_ONCE(t->mm_cid_active, 1);
|
||||
/*
|
||||
* Store t->mm_cid_active before loading per-mm/cpu cid.
|
||||
* Matches barrier in sched_mm_cid_remote_clear_old().
|
||||
*/
|
||||
smp_mb();
|
||||
t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
rseq_set_notify_resume(t);
|
||||
}
|
||||
|
||||
|
@ -2246,6 +2246,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
is_migration_disabled(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
double_unlock_balance(rq, later_rq);
|
||||
later_rq = NULL;
|
||||
@ -2704,6 +2705,13 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
static int task_is_throttled_dl(struct task_struct *p, int cpu)
|
||||
{
|
||||
return p->dl.dl_throttled;
|
||||
}
|
||||
#endif
|
||||
|
||||
DEFINE_SCHED_CLASS(dl) = {
|
||||
|
||||
.enqueue_task = enqueue_task_dl,
|
||||
@ -2736,6 +2744,9 @@ DEFINE_SCHED_CLASS(dl) = {
|
||||
.switched_to = switched_to_dl,
|
||||
|
||||
.update_curr = update_curr_dl,
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
.task_is_throttled = task_is_throttled_dl,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
|
||||
|
@ -6016,6 +6016,10 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
|
||||
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
|
||||
cfs_b->period_timer.function = sched_cfs_period_timer;
|
||||
|
||||
/* Add a random offset so that timers interleave */
|
||||
hrtimer_set_expires(&cfs_b->period_timer,
|
||||
get_random_u32_below(cfs_b->period));
|
||||
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
cfs_b->slack_timer.function = sched_cfs_slack_timer;
|
||||
cfs_b->slack_started = false;
|
||||
@ -6671,7 +6675,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
||||
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
|
||||
|
||||
schedstat_inc(p->stats.nr_wakeups_affine_attempts);
|
||||
if (target == nr_cpumask_bits)
|
||||
if (target != this_cpu)
|
||||
return prev_cpu;
|
||||
|
||||
schedstat_inc(sd->ttwu_move_affine);
|
||||
@ -12033,6 +12037,18 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
|
||||
|
||||
return delta > 0;
|
||||
}
|
||||
|
||||
static int task_is_throttled_fair(struct task_struct *p, int cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
cfs_rq = task_group(p)->cfs_rq[cpu];
|
||||
#else
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
#endif
|
||||
return throttled_hierarchy(cfs_rq);
|
||||
}
|
||||
#else
|
||||
static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
|
||||
#endif
|
||||
@ -12659,6 +12675,10 @@ DEFINE_SCHED_CLASS(fair) = {
|
||||
.task_change_group = task_change_group_fair,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
.task_is_throttled = task_is_throttled_fair,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
.uclamp_enabled = 1,
|
||||
#endif
|
||||
|
@ -186,17 +186,22 @@ static void group_init(struct psi_group *group)
|
||||
seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
|
||||
group->avg_last_update = sched_clock();
|
||||
group->avg_next_update = group->avg_last_update + psi_period;
|
||||
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
||||
mutex_init(&group->avgs_lock);
|
||||
/* Init trigger-related members */
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
mutex_init(&group->trigger_lock);
|
||||
INIT_LIST_HEAD(&group->triggers);
|
||||
group->poll_min_period = U32_MAX;
|
||||
group->polling_next_update = ULLONG_MAX;
|
||||
init_waitqueue_head(&group->poll_wait);
|
||||
timer_setup(&group->poll_timer, poll_timer_fn, 0);
|
||||
rcu_assign_pointer(group->poll_task, NULL);
|
||||
|
||||
/* Init avg trigger-related members */
|
||||
INIT_LIST_HEAD(&group->avg_triggers);
|
||||
memset(group->avg_nr_triggers, 0, sizeof(group->avg_nr_triggers));
|
||||
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
||||
|
||||
/* Init rtpoll trigger-related members */
|
||||
atomic_set(&group->rtpoll_scheduled, 0);
|
||||
mutex_init(&group->rtpoll_trigger_lock);
|
||||
INIT_LIST_HEAD(&group->rtpoll_triggers);
|
||||
group->rtpoll_min_period = U32_MAX;
|
||||
group->rtpoll_next_update = ULLONG_MAX;
|
||||
init_waitqueue_head(&group->rtpoll_wait);
|
||||
timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
|
||||
rcu_assign_pointer(group->rtpoll_task, NULL);
|
||||
}
|
||||
|
||||
void __init psi_init(void)
|
||||
@ -384,6 +389,121 @@ static void collect_percpu_times(struct psi_group *group,
|
||||
*pchanged_states = changed_states;
|
||||
}
|
||||
|
||||
/* Trigger tracking window manipulations */
|
||||
static void window_reset(struct psi_window *win, u64 now, u64 value,
|
||||
u64 prev_growth)
|
||||
{
|
||||
win->start_time = now;
|
||||
win->start_value = value;
|
||||
win->prev_growth = prev_growth;
|
||||
}
|
||||
|
||||
/*
|
||||
* PSI growth tracking window update and growth calculation routine.
|
||||
*
|
||||
* This approximates a sliding tracking window by interpolating
|
||||
* partially elapsed windows using historical growth data from the
|
||||
* previous intervals. This minimizes memory requirements (by not storing
|
||||
* all the intermediate values in the previous window) and simplifies
|
||||
* the calculations. It works well because PSI signal changes only in
|
||||
* positive direction and over relatively small window sizes the growth
|
||||
* is close to linear.
|
||||
*/
|
||||
static u64 window_update(struct psi_window *win, u64 now, u64 value)
|
||||
{
|
||||
u64 elapsed;
|
||||
u64 growth;
|
||||
|
||||
elapsed = now - win->start_time;
|
||||
growth = value - win->start_value;
|
||||
/*
|
||||
* After each tracking window passes win->start_value and
|
||||
* win->start_time get reset and win->prev_growth stores
|
||||
* the average per-window growth of the previous window.
|
||||
* win->prev_growth is then used to interpolate additional
|
||||
* growth from the previous window assuming it was linear.
|
||||
*/
|
||||
if (elapsed > win->size)
|
||||
window_reset(win, now, value, growth);
|
||||
else {
|
||||
u32 remaining;
|
||||
|
||||
remaining = win->size - elapsed;
|
||||
growth += div64_u64(win->prev_growth * remaining, win->size);
|
||||
}
|
||||
|
||||
return growth;
|
||||
}
|
||||
|
||||
static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
|
||||
enum psi_aggregators aggregator)
|
||||
{
|
||||
struct psi_trigger *t;
|
||||
u64 *total = group->total[aggregator];
|
||||
struct list_head *triggers;
|
||||
u64 *aggregator_total;
|
||||
*update_total = false;
|
||||
|
||||
if (aggregator == PSI_AVGS) {
|
||||
triggers = &group->avg_triggers;
|
||||
aggregator_total = group->avg_total;
|
||||
} else {
|
||||
triggers = &group->rtpoll_triggers;
|
||||
aggregator_total = group->rtpoll_total;
|
||||
}
|
||||
|
||||
/*
|
||||
* On subsequent updates, calculate growth deltas and let
|
||||
* watchers know when their specified thresholds are exceeded.
|
||||
*/
|
||||
list_for_each_entry(t, triggers, node) {
|
||||
u64 growth;
|
||||
bool new_stall;
|
||||
|
||||
new_stall = aggregator_total[t->state] != total[t->state];
|
||||
|
||||
/* Check for stall activity or a previous threshold breach */
|
||||
if (!new_stall && !t->pending_event)
|
||||
continue;
|
||||
/*
|
||||
* Check for new stall activity, as well as deferred
|
||||
* events that occurred in the last window after the
|
||||
* trigger had already fired (we want to ratelimit
|
||||
* events without dropping any).
|
||||
*/
|
||||
if (new_stall) {
|
||||
/*
|
||||
* Multiple triggers might be looking at the same state,
|
||||
* remember to update group->polling_total[] once we've
|
||||
* been through all of them. Also remember to extend the
|
||||
* polling time if we see new stall activity.
|
||||
*/
|
||||
*update_total = true;
|
||||
|
||||
/* Calculate growth since last update */
|
||||
growth = window_update(&t->win, now, total[t->state]);
|
||||
if (!t->pending_event) {
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
|
||||
t->pending_event = true;
|
||||
}
|
||||
}
|
||||
/* Limit event signaling to once per window */
|
||||
if (now < t->last_event_time + t->win.size)
|
||||
continue;
|
||||
|
||||
/* Generate an event */
|
||||
if (cmpxchg(&t->event, 0, 1) == 0)
|
||||
wake_up_interruptible(&t->event_wait);
|
||||
t->last_event_time = now;
|
||||
/* Reset threshold breach flag once event got generated */
|
||||
t->pending_event = false;
|
||||
}
|
||||
|
||||
return now + group->rtpoll_min_period;
|
||||
}
|
||||
|
||||
static u64 update_averages(struct psi_group *group, u64 now)
|
||||
{
|
||||
unsigned long missed_periods = 0;
|
||||
@ -442,6 +562,7 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
struct delayed_work *dwork;
|
||||
struct psi_group *group;
|
||||
u32 changed_states;
|
||||
bool update_total;
|
||||
u64 now;
|
||||
|
||||
dwork = to_delayed_work(work);
|
||||
@ -459,8 +580,10 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
* Once restarted, we'll catch up the running averages in one
|
||||
* go - see calc_avgs() and missed_periods.
|
||||
*/
|
||||
if (now >= group->avg_next_update)
|
||||
if (now >= group->avg_next_update) {
|
||||
update_triggers(group, now, &update_total, PSI_AVGS);
|
||||
group->avg_next_update = update_averages(group, now);
|
||||
}
|
||||
|
||||
if (changed_states & PSI_STATE_RESCHEDULE) {
|
||||
schedule_delayed_work(dwork, nsecs_to_jiffies(
|
||||
@ -470,165 +593,58 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
mutex_unlock(&group->avgs_lock);
|
||||
}
|
||||
|
||||
/* Trigger tracking window manipulations */
|
||||
static void window_reset(struct psi_window *win, u64 now, u64 value,
|
||||
u64 prev_growth)
|
||||
{
|
||||
win->start_time = now;
|
||||
win->start_value = value;
|
||||
win->prev_growth = prev_growth;
|
||||
}
|
||||
|
||||
/*
|
||||
* PSI growth tracking window update and growth calculation routine.
|
||||
*
|
||||
* This approximates a sliding tracking window by interpolating
|
||||
* partially elapsed windows using historical growth data from the
|
||||
* previous intervals. This minimizes memory requirements (by not storing
|
||||
* all the intermediate values in the previous window) and simplifies
|
||||
* the calculations. It works well because PSI signal changes only in
|
||||
* positive direction and over relatively small window sizes the growth
|
||||
* is close to linear.
|
||||
*/
|
||||
static u64 window_update(struct psi_window *win, u64 now, u64 value)
|
||||
{
|
||||
u64 elapsed;
|
||||
u64 growth;
|
||||
|
||||
elapsed = now - win->start_time;
|
||||
growth = value - win->start_value;
|
||||
/*
|
||||
* After each tracking window passes win->start_value and
|
||||
* win->start_time get reset and win->prev_growth stores
|
||||
* the average per-window growth of the previous window.
|
||||
* win->prev_growth is then used to interpolate additional
|
||||
* growth from the previous window assuming it was linear.
|
||||
*/
|
||||
if (elapsed > win->size)
|
||||
window_reset(win, now, value, growth);
|
||||
else {
|
||||
u32 remaining;
|
||||
|
||||
remaining = win->size - elapsed;
|
||||
growth += div64_u64(win->prev_growth * remaining, win->size);
|
||||
}
|
||||
|
||||
return growth;
|
||||
}
|
||||
|
||||
static void init_triggers(struct psi_group *group, u64 now)
|
||||
static void init_rtpoll_triggers(struct psi_group *group, u64 now)
|
||||
{
|
||||
struct psi_trigger *t;
|
||||
|
||||
list_for_each_entry(t, &group->triggers, node)
|
||||
list_for_each_entry(t, &group->rtpoll_triggers, node)
|
||||
window_reset(&t->win, now,
|
||||
group->total[PSI_POLL][t->state], 0);
|
||||
memcpy(group->polling_total, group->total[PSI_POLL],
|
||||
sizeof(group->polling_total));
|
||||
group->polling_next_update = now + group->poll_min_period;
|
||||
}
|
||||
|
||||
static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
{
|
||||
struct psi_trigger *t;
|
||||
bool update_total = false;
|
||||
u64 *total = group->total[PSI_POLL];
|
||||
|
||||
/*
|
||||
* On subsequent updates, calculate growth deltas and let
|
||||
* watchers know when their specified thresholds are exceeded.
|
||||
*/
|
||||
list_for_each_entry(t, &group->triggers, node) {
|
||||
u64 growth;
|
||||
bool new_stall;
|
||||
|
||||
new_stall = group->polling_total[t->state] != total[t->state];
|
||||
|
||||
/* Check for stall activity or a previous threshold breach */
|
||||
if (!new_stall && !t->pending_event)
|
||||
continue;
|
||||
/*
|
||||
* Check for new stall activity, as well as deferred
|
||||
* events that occurred in the last window after the
|
||||
* trigger had already fired (we want to ratelimit
|
||||
* events without dropping any).
|
||||
*/
|
||||
if (new_stall) {
|
||||
/*
|
||||
* Multiple triggers might be looking at the same state,
|
||||
* remember to update group->polling_total[] once we've
|
||||
* been through all of them. Also remember to extend the
|
||||
* polling time if we see new stall activity.
|
||||
*/
|
||||
update_total = true;
|
||||
|
||||
/* Calculate growth since last update */
|
||||
growth = window_update(&t->win, now, total[t->state]);
|
||||
if (!t->pending_event) {
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
|
||||
t->pending_event = true;
|
||||
}
|
||||
}
|
||||
/* Limit event signaling to once per window */
|
||||
if (now < t->last_event_time + t->win.size)
|
||||
continue;
|
||||
|
||||
/* Generate an event */
|
||||
if (cmpxchg(&t->event, 0, 1) == 0)
|
||||
wake_up_interruptible(&t->event_wait);
|
||||
t->last_event_time = now;
|
||||
/* Reset threshold breach flag once event got generated */
|
||||
t->pending_event = false;
|
||||
}
|
||||
|
||||
if (update_total)
|
||||
memcpy(group->polling_total, total,
|
||||
sizeof(group->polling_total));
|
||||
|
||||
return now + group->poll_min_period;
|
||||
memcpy(group->rtpoll_total, group->total[PSI_POLL],
|
||||
sizeof(group->rtpoll_total));
|
||||
group->rtpoll_next_update = now + group->rtpoll_min_period;
|
||||
}
|
||||
|
||||
/* Schedule polling if it's not already scheduled or forced. */
|
||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
|
||||
static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay,
|
||||
bool force)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
/*
|
||||
* atomic_xchg should be called even when !force to provide a
|
||||
* full memory barrier (see the comment inside psi_poll_work).
|
||||
* full memory barrier (see the comment inside psi_rtpoll_work).
|
||||
*/
|
||||
if (atomic_xchg(&group->poll_scheduled, 1) && !force)
|
||||
if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
task = rcu_dereference(group->poll_task);
|
||||
task = rcu_dereference(group->rtpoll_task);
|
||||
/*
|
||||
* kworker might be NULL in case psi_trigger_destroy races with
|
||||
* psi_task_change (hotpath) which can't use locks
|
||||
*/
|
||||
if (likely(task))
|
||||
mod_timer(&group->poll_timer, jiffies + delay);
|
||||
mod_timer(&group->rtpoll_timer, jiffies + delay);
|
||||
else
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
atomic_set(&group->rtpoll_scheduled, 0);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void psi_poll_work(struct psi_group *group)
|
||||
static void psi_rtpoll_work(struct psi_group *group)
|
||||
{
|
||||
bool force_reschedule = false;
|
||||
u32 changed_states;
|
||||
bool update_total;
|
||||
u64 now;
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
mutex_lock(&group->rtpoll_trigger_lock);
|
||||
|
||||
now = sched_clock();
|
||||
|
||||
if (now > group->polling_until) {
|
||||
if (now > group->rtpoll_until) {
|
||||
/*
|
||||
* We are either about to start or might stop polling if no
|
||||
* state change was recorded. Resetting poll_scheduled leaves
|
||||
@ -638,7 +654,7 @@ static void psi_poll_work(struct psi_group *group)
|
||||
* should be negligible and polling_next_update still keeps
|
||||
* updates correctly on schedule.
|
||||
*/
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
atomic_set(&group->rtpoll_scheduled, 0);
|
||||
/*
|
||||
* A task change can race with the poll worker that is supposed to
|
||||
* report on it. To avoid missing events, ensure ordering between
|
||||
@ -667,60 +683,64 @@ static void psi_poll_work(struct psi_group *group)
|
||||
|
||||
collect_percpu_times(group, PSI_POLL, &changed_states);
|
||||
|
||||
if (changed_states & group->poll_states) {
|
||||
if (changed_states & group->rtpoll_states) {
|
||||
/* Initialize trigger windows when entering polling mode */
|
||||
if (now > group->polling_until)
|
||||
init_triggers(group, now);
|
||||
if (now > group->rtpoll_until)
|
||||
init_rtpoll_triggers(group, now);
|
||||
|
||||
/*
|
||||
* Keep the monitor active for at least the duration of the
|
||||
* minimum tracking window as long as monitor states are
|
||||
* changing.
|
||||
*/
|
||||
group->polling_until = now +
|
||||
group->poll_min_period * UPDATES_PER_WINDOW;
|
||||
group->rtpoll_until = now +
|
||||
group->rtpoll_min_period * UPDATES_PER_WINDOW;
|
||||
}
|
||||
|
||||
if (now > group->polling_until) {
|
||||
group->polling_next_update = ULLONG_MAX;
|
||||
if (now > group->rtpoll_until) {
|
||||
group->rtpoll_next_update = ULLONG_MAX;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (now >= group->polling_next_update)
|
||||
group->polling_next_update = update_triggers(group, now);
|
||||
if (now >= group->rtpoll_next_update) {
|
||||
group->rtpoll_next_update = update_triggers(group, now, &update_total, PSI_POLL);
|
||||
if (update_total)
|
||||
memcpy(group->rtpoll_total, group->total[PSI_POLL],
|
||||
sizeof(group->rtpoll_total));
|
||||
}
|
||||
|
||||
psi_schedule_poll_work(group,
|
||||
nsecs_to_jiffies(group->polling_next_update - now) + 1,
|
||||
psi_schedule_rtpoll_work(group,
|
||||
nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
|
||||
force_reschedule);
|
||||
|
||||
out:
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||
}
|
||||
|
||||
static int psi_poll_worker(void *data)
|
||||
static int psi_rtpoll_worker(void *data)
|
||||
{
|
||||
struct psi_group *group = (struct psi_group *)data;
|
||||
|
||||
sched_set_fifo_low(current);
|
||||
|
||||
while (true) {
|
||||
wait_event_interruptible(group->poll_wait,
|
||||
atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
|
||||
wait_event_interruptible(group->rtpoll_wait,
|
||||
atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) ||
|
||||
kthread_should_stop());
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
|
||||
psi_poll_work(group);
|
||||
psi_rtpoll_work(group);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void poll_timer_fn(struct timer_list *t)
|
||||
{
|
||||
struct psi_group *group = from_timer(group, t, poll_timer);
|
||||
struct psi_group *group = from_timer(group, t, rtpoll_timer);
|
||||
|
||||
atomic_set(&group->poll_wakeup, 1);
|
||||
wake_up_interruptible(&group->poll_wait);
|
||||
atomic_set(&group->rtpoll_wakeup, 1);
|
||||
wake_up_interruptible(&group->rtpoll_wait);
|
||||
}
|
||||
|
||||
static void record_times(struct psi_group_cpu *groupc, u64 now)
|
||||
@ -851,8 +871,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (state_mask & group->poll_states)
|
||||
psi_schedule_poll_work(group, 1, false);
|
||||
if (state_mask & group->rtpoll_states)
|
||||
psi_schedule_rtpoll_work(group, 1, false);
|
||||
|
||||
if (wake_clock && !delayed_work_pending(&group->avgs_work))
|
||||
schedule_delayed_work(&group->avgs_work, PSI_FREQ);
|
||||
@ -1005,8 +1025,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
|
||||
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (group->poll_states & (1 << PSI_IRQ_FULL))
|
||||
psi_schedule_poll_work(group, 1, false);
|
||||
if (group->rtpoll_states & (1 << PSI_IRQ_FULL))
|
||||
psi_schedule_rtpoll_work(group, 1, false);
|
||||
} while ((group = group->parent));
|
||||
}
|
||||
#endif
|
||||
@ -1101,7 +1121,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
|
||||
cancel_delayed_work_sync(&cgroup->psi->avgs_work);
|
||||
free_percpu(cgroup->psi->pcpu);
|
||||
/* All triggers must be removed by now */
|
||||
WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
|
||||
WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n");
|
||||
kfree(cgroup->psi);
|
||||
}
|
||||
|
||||
@ -1253,16 +1273,23 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
|
||||
}
|
||||
|
||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
char *buf, enum psi_res res)
|
||||
char *buf, enum psi_res res, struct file *file)
|
||||
{
|
||||
struct psi_trigger *t;
|
||||
enum psi_states state;
|
||||
u32 threshold_us;
|
||||
bool privileged;
|
||||
u32 window_us;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
/*
|
||||
* Checking the privilege here on file->f_cred implies that a privileged user
|
||||
* could open the file and delegate the write to an unprivileged one.
|
||||
*/
|
||||
privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
|
||||
|
||||
if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
|
||||
state = PSI_IO_SOME + res * 2;
|
||||
else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
|
||||
@ -1282,6 +1309,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
window_us > WINDOW_MAX_US)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
* Unprivileged users can only use 2s windows so that averages aggregation
|
||||
* work is used, and no RT threads need to be spawned.
|
||||
*/
|
||||
if (!privileged && window_us % 2000000)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* Check threshold */
|
||||
if (threshold_us == 0 || threshold_us > window_us)
|
||||
return ERR_PTR(-EINVAL);
|
||||
@ -1301,31 +1335,40 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
t->last_event_time = 0;
|
||||
init_waitqueue_head(&t->event_wait);
|
||||
t->pending_event = false;
|
||||
t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
if (privileged) {
|
||||
mutex_lock(&group->rtpoll_trigger_lock);
|
||||
|
||||
if (!rcu_access_pointer(group->poll_task)) {
|
||||
struct task_struct *task;
|
||||
if (!rcu_access_pointer(group->rtpoll_task)) {
|
||||
struct task_struct *task;
|
||||
|
||||
task = kthread_create(psi_poll_worker, group, "psimon");
|
||||
if (IS_ERR(task)) {
|
||||
kfree(t);
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
return ERR_CAST(task);
|
||||
task = kthread_create(psi_rtpoll_worker, group, "psimon");
|
||||
if (IS_ERR(task)) {
|
||||
kfree(t);
|
||||
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||
return ERR_CAST(task);
|
||||
}
|
||||
atomic_set(&group->rtpoll_wakeup, 0);
|
||||
wake_up_process(task);
|
||||
rcu_assign_pointer(group->rtpoll_task, task);
|
||||
}
|
||||
atomic_set(&group->poll_wakeup, 0);
|
||||
wake_up_process(task);
|
||||
rcu_assign_pointer(group->poll_task, task);
|
||||
|
||||
list_add(&t->node, &group->rtpoll_triggers);
|
||||
group->rtpoll_min_period = min(group->rtpoll_min_period,
|
||||
div_u64(t->win.size, UPDATES_PER_WINDOW));
|
||||
group->rtpoll_nr_triggers[t->state]++;
|
||||
group->rtpoll_states |= (1 << t->state);
|
||||
|
||||
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||
} else {
|
||||
mutex_lock(&group->avgs_lock);
|
||||
|
||||
list_add(&t->node, &group->avg_triggers);
|
||||
group->avg_nr_triggers[t->state]++;
|
||||
|
||||
mutex_unlock(&group->avgs_lock);
|
||||
}
|
||||
|
||||
list_add(&t->node, &group->triggers);
|
||||
group->poll_min_period = min(group->poll_min_period,
|
||||
div_u64(t->win.size, UPDATES_PER_WINDOW));
|
||||
group->nr_triggers[t->state]++;
|
||||
group->poll_states |= (1 << t->state);
|
||||
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
@ -1349,51 +1392,59 @@ void psi_trigger_destroy(struct psi_trigger *t)
|
||||
*/
|
||||
wake_up_pollfree(&t->event_wait);
|
||||
|
||||
mutex_lock(&group->trigger_lock);
|
||||
|
||||
if (!list_empty(&t->node)) {
|
||||
struct psi_trigger *tmp;
|
||||
u64 period = ULLONG_MAX;
|
||||
|
||||
list_del(&t->node);
|
||||
group->nr_triggers[t->state]--;
|
||||
if (!group->nr_triggers[t->state])
|
||||
group->poll_states &= ~(1 << t->state);
|
||||
/* reset min update period for the remaining triggers */
|
||||
list_for_each_entry(tmp, &group->triggers, node)
|
||||
period = min(period, div_u64(tmp->win.size,
|
||||
UPDATES_PER_WINDOW));
|
||||
group->poll_min_period = period;
|
||||
/* Destroy poll_task when the last trigger is destroyed */
|
||||
if (group->poll_states == 0) {
|
||||
group->polling_until = 0;
|
||||
task_to_destroy = rcu_dereference_protected(
|
||||
group->poll_task,
|
||||
lockdep_is_held(&group->trigger_lock));
|
||||
rcu_assign_pointer(group->poll_task, NULL);
|
||||
del_timer(&group->poll_timer);
|
||||
if (t->aggregator == PSI_AVGS) {
|
||||
mutex_lock(&group->avgs_lock);
|
||||
if (!list_empty(&t->node)) {
|
||||
list_del(&t->node);
|
||||
group->avg_nr_triggers[t->state]--;
|
||||
}
|
||||
mutex_unlock(&group->avgs_lock);
|
||||
} else {
|
||||
mutex_lock(&group->rtpoll_trigger_lock);
|
||||
if (!list_empty(&t->node)) {
|
||||
struct psi_trigger *tmp;
|
||||
u64 period = ULLONG_MAX;
|
||||
|
||||
list_del(&t->node);
|
||||
group->rtpoll_nr_triggers[t->state]--;
|
||||
if (!group->rtpoll_nr_triggers[t->state])
|
||||
group->rtpoll_states &= ~(1 << t->state);
|
||||
/* reset min update period for the remaining triggers */
|
||||
list_for_each_entry(tmp, &group->rtpoll_triggers, node)
|
||||
period = min(period, div_u64(tmp->win.size,
|
||||
UPDATES_PER_WINDOW));
|
||||
group->rtpoll_min_period = period;
|
||||
/* Destroy rtpoll_task when the last trigger is destroyed */
|
||||
if (group->rtpoll_states == 0) {
|
||||
group->rtpoll_until = 0;
|
||||
task_to_destroy = rcu_dereference_protected(
|
||||
group->rtpoll_task,
|
||||
lockdep_is_held(&group->rtpoll_trigger_lock));
|
||||
rcu_assign_pointer(group->rtpoll_task, NULL);
|
||||
del_timer(&group->rtpoll_timer);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||
}
|
||||
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
|
||||
/*
|
||||
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
||||
* Wait for psi_schedule_rtpoll_work RCU to complete its read-side
|
||||
* critical section before destroying the trigger and optionally the
|
||||
* poll_task.
|
||||
* rtpoll_task.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
/*
|
||||
* Stop kthread 'psimon' after releasing trigger_lock to prevent a
|
||||
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
||||
* Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent
|
||||
* a deadlock while waiting for psi_rtpoll_work to acquire
|
||||
* rtpoll_trigger_lock
|
||||
*/
|
||||
if (task_to_destroy) {
|
||||
/*
|
||||
* After the RCU grace period has expired, the worker
|
||||
* can no longer be found through group->poll_task.
|
||||
* can no longer be found through group->rtpoll_task.
|
||||
*/
|
||||
kthread_stop(task_to_destroy);
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
atomic_set(&group->rtpoll_scheduled, 0);
|
||||
}
|
||||
kfree(t);
|
||||
}
|
||||
@ -1435,27 +1486,19 @@ static int psi_cpu_show(struct seq_file *m, void *v)
|
||||
return psi_show(m, &psi_system, PSI_CPU);
|
||||
}
|
||||
|
||||
static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
|
||||
{
|
||||
if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
|
||||
return -EPERM;
|
||||
|
||||
return single_open(file, psi_show, NULL);
|
||||
}
|
||||
|
||||
static int psi_io_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return psi_open(file, psi_io_show);
|
||||
return single_open(file, psi_io_show, NULL);
|
||||
}
|
||||
|
||||
static int psi_memory_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return psi_open(file, psi_memory_show);
|
||||
return single_open(file, psi_memory_show, NULL);
|
||||
}
|
||||
|
||||
static int psi_cpu_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return psi_open(file, psi_cpu_show);
|
||||
return single_open(file, psi_cpu_show, NULL);
|
||||
}
|
||||
|
||||
static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||
@ -1489,7 +1532,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
new = psi_trigger_create(&psi_system, buf, res);
|
||||
new = psi_trigger_create(&psi_system, buf, res, file);
|
||||
if (IS_ERR(new)) {
|
||||
mutex_unlock(&seq->lock);
|
||||
return PTR_ERR(new);
|
||||
@ -1569,7 +1612,7 @@ static int psi_irq_show(struct seq_file *m, void *v)
|
||||
|
||||
static int psi_irq_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return psi_open(file, psi_irq_show);
|
||||
return single_open(file, psi_irq_show, NULL);
|
||||
}
|
||||
|
||||
static ssize_t psi_irq_write(struct file *file, const char __user *user_buf,
|
||||
|
@ -2000,11 +2000,15 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
* the mean time, task could have
|
||||
* migrated already or had its affinity changed.
|
||||
* Also make sure that it wasn't scheduled on its rq.
|
||||
* It is possible the task was scheduled, set
|
||||
* "migrate_disabled" and then got preempted, so we must
|
||||
* check the task migration disable flag here too.
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
is_migration_disabled(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
|
||||
double_unlock_balance(rq, lowest_rq);
|
||||
@ -2677,6 +2681,21 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
static int task_is_throttled_rt(struct task_struct *p, int cpu)
|
||||
{
|
||||
struct rt_rq *rt_rq;
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
rt_rq = task_group(p)->rt_rq[cpu];
|
||||
#else
|
||||
rt_rq = &cpu_rq(cpu)->rt;
|
||||
#endif
|
||||
|
||||
return rt_rq_throttled(rt_rq);
|
||||
}
|
||||
#endif
|
||||
|
||||
DEFINE_SCHED_CLASS(rt) = {
|
||||
|
||||
.enqueue_task = enqueue_task_rt,
|
||||
@ -2710,6 +2729,10 @@ DEFINE_SCHED_CLASS(rt) = {
|
||||
|
||||
.update_curr = update_curr_rt,
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
.task_is_throttled = task_is_throttled_rt,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
.uclamp_enabled = 1,
|
||||
#endif
|
||||
|
@ -2224,6 +2224,10 @@ struct sched_class {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
void (*task_change_group)(struct task_struct *p);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
int (*task_is_throttled)(struct task_struct *p, int cpu);
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
||||
@ -3249,61 +3253,238 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
static inline int __mm_cid_get(struct mm_struct *mm)
|
||||
|
||||
#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
|
||||
#define MM_CID_SCAN_DELAY 100 /* 100ms */
|
||||
|
||||
extern raw_spinlock_t cid_lock;
|
||||
extern int use_cid_lock;
|
||||
|
||||
extern void sched_mm_cid_migrate_from(struct task_struct *t);
|
||||
extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
|
||||
extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
|
||||
extern void init_sched_mm_cid(struct task_struct *t);
|
||||
|
||||
static inline void __mm_cid_put(struct mm_struct *mm, int cid)
|
||||
{
|
||||
if (cid < 0)
|
||||
return;
|
||||
cpumask_clear_cpu(cid, mm_cidmask(mm));
|
||||
}
|
||||
|
||||
/*
|
||||
* The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
|
||||
* the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
|
||||
* be held to transition to other states.
|
||||
*
|
||||
* State transitions synchronized with cmpxchg or try_cmpxchg need to be
|
||||
* consistent across cpus, which prevents use of this_cpu_cmpxchg.
|
||||
*/
|
||||
static inline void mm_cid_put_lazy(struct task_struct *t)
|
||||
{
|
||||
struct mm_struct *mm = t->mm;
|
||||
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
||||
int cid;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
cid = __this_cpu_read(pcpu_cid->cid);
|
||||
if (!mm_cid_is_lazy_put(cid) ||
|
||||
!try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
|
||||
return;
|
||||
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
||||
}
|
||||
|
||||
static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
|
||||
{
|
||||
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
||||
int cid, res;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
cid = __this_cpu_read(pcpu_cid->cid);
|
||||
for (;;) {
|
||||
if (mm_cid_is_unset(cid))
|
||||
return MM_CID_UNSET;
|
||||
/*
|
||||
* Attempt transition from valid or lazy-put to unset.
|
||||
*/
|
||||
res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
|
||||
if (res == cid)
|
||||
break;
|
||||
cid = res;
|
||||
}
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline void mm_cid_put(struct mm_struct *mm)
|
||||
{
|
||||
int cid;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
cid = mm_cid_pcpu_unset(mm);
|
||||
if (cid == MM_CID_UNSET)
|
||||
return;
|
||||
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
||||
}
|
||||
|
||||
static inline int __mm_cid_try_get(struct mm_struct *mm)
|
||||
{
|
||||
struct cpumask *cpumask;
|
||||
int cid;
|
||||
|
||||
cpumask = mm_cidmask(mm);
|
||||
cid = cpumask_first_zero(cpumask);
|
||||
if (cid >= nr_cpu_ids)
|
||||
/*
|
||||
* Retry finding first zero bit if the mask is temporarily
|
||||
* filled. This only happens during concurrent remote-clear
|
||||
* which owns a cid without holding a rq lock.
|
||||
*/
|
||||
for (;;) {
|
||||
cid = cpumask_first_zero(cpumask);
|
||||
if (cid < nr_cpu_ids)
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
if (cpumask_test_and_set_cpu(cid, cpumask))
|
||||
return -1;
|
||||
__cpumask_set_cpu(cid, cpumask);
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline void mm_cid_put(struct mm_struct *mm, int cid)
|
||||
/*
|
||||
* Save a snapshot of the current runqueue time of this cpu
|
||||
* with the per-cpu cid value, allowing to estimate how recently it was used.
|
||||
*/
|
||||
static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
if (cid < 0)
|
||||
return;
|
||||
raw_spin_lock(&mm->cid_lock);
|
||||
__cpumask_clear_cpu(cid, mm_cidmask(mm));
|
||||
raw_spin_unlock(&mm->cid_lock);
|
||||
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
WRITE_ONCE(pcpu_cid->time, rq->clock);
|
||||
}
|
||||
|
||||
static inline int mm_cid_get(struct mm_struct *mm)
|
||||
static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
|
||||
{
|
||||
int ret;
|
||||
int cid;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
raw_spin_lock(&mm->cid_lock);
|
||||
ret = __mm_cid_get(mm);
|
||||
raw_spin_unlock(&mm->cid_lock);
|
||||
return ret;
|
||||
/*
|
||||
* All allocations (even those using the cid_lock) are lock-free. If
|
||||
* use_cid_lock is set, hold the cid_lock to perform cid allocation to
|
||||
* guarantee forward progress.
|
||||
*/
|
||||
if (!READ_ONCE(use_cid_lock)) {
|
||||
cid = __mm_cid_try_get(mm);
|
||||
if (cid >= 0)
|
||||
goto end;
|
||||
raw_spin_lock(&cid_lock);
|
||||
} else {
|
||||
raw_spin_lock(&cid_lock);
|
||||
cid = __mm_cid_try_get(mm);
|
||||
if (cid >= 0)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* cid concurrently allocated. Retry while forcing following
|
||||
* allocations to use the cid_lock to ensure forward progress.
|
||||
*/
|
||||
WRITE_ONCE(use_cid_lock, 1);
|
||||
/*
|
||||
* Set use_cid_lock before allocation. Only care about program order
|
||||
* because this is only required for forward progress.
|
||||
*/
|
||||
barrier();
|
||||
/*
|
||||
* Retry until it succeeds. It is guaranteed to eventually succeed once
|
||||
* all newcoming allocations observe the use_cid_lock flag set.
|
||||
*/
|
||||
do {
|
||||
cid = __mm_cid_try_get(mm);
|
||||
cpu_relax();
|
||||
} while (cid < 0);
|
||||
/*
|
||||
* Allocate before clearing use_cid_lock. Only care about
|
||||
* program order because this is for forward progress.
|
||||
*/
|
||||
barrier();
|
||||
WRITE_ONCE(use_cid_lock, 0);
|
||||
unlock:
|
||||
raw_spin_unlock(&cid_lock);
|
||||
end:
|
||||
mm_cid_snapshot_time(rq, mm);
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next)
|
||||
static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
|
||||
{
|
||||
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
|
||||
struct cpumask *cpumask;
|
||||
int cid;
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
cpumask = mm_cidmask(mm);
|
||||
cid = __this_cpu_read(pcpu_cid->cid);
|
||||
if (mm_cid_is_valid(cid)) {
|
||||
mm_cid_snapshot_time(rq, mm);
|
||||
return cid;
|
||||
}
|
||||
if (mm_cid_is_lazy_put(cid)) {
|
||||
if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
|
||||
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
|
||||
}
|
||||
cid = __mm_cid_get(rq, mm);
|
||||
__this_cpu_write(pcpu_cid->cid, cid);
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline void switch_mm_cid(struct rq *rq,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
/*
|
||||
* Provide a memory barrier between rq->curr store and load of
|
||||
* {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
|
||||
*
|
||||
* Should be adapted if context_switch() is modified.
|
||||
*/
|
||||
if (!next->mm) { // to kernel
|
||||
/*
|
||||
* user -> kernel transition does not guarantee a barrier, but
|
||||
* we can use the fact that it performs an atomic operation in
|
||||
* mmgrab().
|
||||
*/
|
||||
if (prev->mm) // from user
|
||||
smp_mb__after_mmgrab();
|
||||
/*
|
||||
* kernel -> kernel transition does not change rq->curr->mm
|
||||
* state. It stays NULL.
|
||||
*/
|
||||
} else { // to user
|
||||
/*
|
||||
* kernel -> user transition does not provide a barrier
|
||||
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
|
||||
* Provide it here.
|
||||
*/
|
||||
if (!prev->mm) // from kernel
|
||||
smp_mb();
|
||||
/*
|
||||
* user -> user transition guarantees a memory barrier through
|
||||
* switch_mm() when current->mm changes. If current->mm is
|
||||
* unchanged, no barrier is needed.
|
||||
*/
|
||||
}
|
||||
if (prev->mm_cid_active) {
|
||||
if (next->mm_cid_active && next->mm == prev->mm) {
|
||||
/*
|
||||
* Context switch between threads in same mm, hand over
|
||||
* the mm_cid from prev to next.
|
||||
*/
|
||||
next->mm_cid = prev->mm_cid;
|
||||
prev->mm_cid = -1;
|
||||
return;
|
||||
}
|
||||
mm_cid_put(prev->mm, prev->mm_cid);
|
||||
mm_cid_snapshot_time(rq, prev->mm);
|
||||
mm_cid_put_lazy(prev);
|
||||
prev->mm_cid = -1;
|
||||
}
|
||||
if (next->mm_cid_active)
|
||||
next->mm_cid = mm_cid_get(next->mm);
|
||||
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
|
||||
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
|
||||
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
|
||||
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
|
||||
static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
|
||||
static inline void init_sched_mm_cid(struct task_struct *t) { }
|
||||
#endif
|
||||
|
||||
#endif /* _KERNEL_SCHED_SCHED_H */
|
||||
|
@ -209,8 +209,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
||||
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
||||
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
|
||||
static unsigned int sysctl_sched_energy_aware = 1;
|
||||
DEFINE_MUTEX(sched_energy_mutex);
|
||||
bool sched_energy_update;
|
||||
static DEFINE_MUTEX(sched_energy_mutex);
|
||||
static bool sched_energy_update;
|
||||
|
||||
void rebuild_sched_domains_energy(void)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user