mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-23 12:14:10 +08:00
sched/core: Disable page allocation in task_tick_mm_cid()
With KASAN and PREEMPT_RT enabled, calling task_work_add() in task_tick_mm_cid() may cause the following splat. [ 63.696416] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 63.696416] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 610, name: modprobe [ 63.696416] preempt_count: 10001, expected: 0 [ 63.696416] RCU nest depth: 1, expected: 1 This problem is caused by the following call trace. sched_tick() [ acquire rq->__lock ] -> task_tick_mm_cid() -> task_work_add() -> __kasan_record_aux_stack() -> kasan_save_stack() -> stack_depot_save_flags() -> alloc_pages_mpol_noprof() -> __alloc_pages_noprof() -> get_page_from_freelist() -> rmqueue() -> rmqueue_pcplist() -> __rmqueue_pcplist() -> rmqueue_bulk() -> rt_spin_lock() The rq lock is a raw_spinlock_t. We can't sleep while holding it. IOW, we can't call alloc_pages() in stack_depot_save_flags(). The task_tick_mm_cid() function with its task_work_add() call was introduced by commit223baf9d17
("sched: Fix performance regression introduced by mm_cid") in v6.4 kernel. Fortunately, there is a kasan_record_aux_stack_noalloc() variant that calls stack_depot_save_flags() while not allowing it to allocate new pages. To allow task_tick_mm_cid() to use task_work without page allocation, a new TWAF_NO_ALLOC flag is added to enable calling kasan_record_aux_stack_noalloc() instead of kasan_record_aux_stack() if set. The task_tick_mm_cid() function is modified to add this new flag. The possible downside is the missing stack trace in a KASAN report due to new page allocation required when task_work_add_noallloc() is called which should be rare. Fixes:223baf9d17
("sched: Fix performance regression introduced by mm_cid") Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20241010014432.194742-1-longman@redhat.com
This commit is contained in:
parent
d16b7eb6f5
commit
73ab05aa46
@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum task_work_notify_mode {
|
enum task_work_notify_mode {
|
||||||
TWA_NONE,
|
TWA_NONE = 0,
|
||||||
TWA_RESUME,
|
TWA_RESUME,
|
||||||
TWA_SIGNAL,
|
TWA_SIGNAL,
|
||||||
TWA_SIGNAL_NO_IPI,
|
TWA_SIGNAL_NO_IPI,
|
||||||
TWA_NMI_CURRENT,
|
TWA_NMI_CURRENT,
|
||||||
|
|
||||||
|
TWA_FLAGS = 0xff00,
|
||||||
|
TWAF_NO_ALLOC = 0x0100,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool task_work_pending(struct task_struct *task)
|
static inline bool task_work_pending(struct task_struct *task)
|
||||||
|
@ -10458,7 +10458,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
|
|||||||
return;
|
return;
|
||||||
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
|
||||||
return;
|
return;
|
||||||
task_work_add(curr, work, TWA_RESUME);
|
|
||||||
|
/* No page allocation under rq lock */
|
||||||
|
task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sched_mm_cid_exit_signals(struct task_struct *t)
|
void sched_mm_cid_exit_signals(struct task_struct *t)
|
||||||
|
@ -55,15 +55,26 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
|||||||
enum task_work_notify_mode notify)
|
enum task_work_notify_mode notify)
|
||||||
{
|
{
|
||||||
struct callback_head *head;
|
struct callback_head *head;
|
||||||
|
int flags = notify & TWA_FLAGS;
|
||||||
|
|
||||||
|
notify &= ~TWA_FLAGS;
|
||||||
if (notify == TWA_NMI_CURRENT) {
|
if (notify == TWA_NMI_CURRENT) {
|
||||||
if (WARN_ON_ONCE(task != current))
|
if (WARN_ON_ONCE(task != current))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (!IS_ENABLED(CONFIG_IRQ_WORK))
|
if (!IS_ENABLED(CONFIG_IRQ_WORK))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else {
|
} else {
|
||||||
/* record the work call stack in order to print it in KASAN reports */
|
/*
|
||||||
kasan_record_aux_stack(work);
|
* Record the work call stack in order to print it in KASAN
|
||||||
|
* reports.
|
||||||
|
*
|
||||||
|
* Note that stack allocation can fail if TWAF_NO_ALLOC flag
|
||||||
|
* is set and new page is needed to expand the stack buffer.
|
||||||
|
*/
|
||||||
|
if (flags & TWAF_NO_ALLOC)
|
||||||
|
kasan_record_aux_stack_noalloc(work);
|
||||||
|
else
|
||||||
|
kasan_record_aux_stack(work);
|
||||||
}
|
}
|
||||||
|
|
||||||
head = READ_ONCE(task->task_works);
|
head = READ_ONCE(task->task_works);
|
||||||
|
Loading…
Reference in New Issue
Block a user