sched/wait: Provide infrastructure to deal with nested blocking

There are a few places that call blocking primitives from wait loops,
provide infrastructure to support this without the typical
task_struct::state collision.

We record the wakeup in wait_queue_t::flags which leaves
task_struct::state free to be used by others.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: tglx@linutronix.de
Cc: ilya.dryomov@inktank.com
Cc: umgwanakikbuti@gmail.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140924082242.051202318@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2014-09-24 10:18:47 +02:00 committed by Ingo Molnar
parent 6f942a1f26
commit 61ada528de
2 changed files with 67 additions and 1 deletions

View File

@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t;
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
/* __wait_queue::flags */
#define WQ_FLAG_EXCLUSIVE 0x01
#define WQ_FLAG_WOKEN 0x02
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
@ -830,6 +833,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);

View File

@ -297,6 +297,67 @@ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *
}
EXPORT_SYMBOL(autoremove_wake_function);
/*
* DEFINE_WAIT_FUNC(wait, woken_wake_func);
*
* add_wait_queue(&wq, &wait);
* for (;;) {
* if (condition)
* break;
*
* p->state = mode; condition = true;
* smp_mb(); // A smp_wmb(); // C
* if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN;
* schedule() try_to_wake_up();
* p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~
* wait->flags &= ~WQ_FLAG_WOKEN; condition = true;
* smp_mb() // B smp_wmb(); // C
* wait->flags |= WQ_FLAG_WOKEN;
* }
* remove_wait_queue(&wq, &wait);
*
*/
long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
{
set_current_state(mode); /* A */
/*
* The above implies an smp_mb(), which matches with the smp_wmb() from
* woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
* also observe all state before the wakeup.
*/
if (!(wait->flags & WQ_FLAG_WOKEN))
timeout = schedule_timeout(timeout);
__set_current_state(TASK_RUNNING);
/*
* The below implies an smp_mb(), it too pairs with the smp_wmb() from
* woken_wake_function() such that we must either observe the wait
* condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
* an event.
*/
set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
return timeout;
}
EXPORT_SYMBOL(wait_woken);
int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
/*
* Although this function is called under waitqueue lock, LOCK
* doesn't imply write barrier and the users expects write
* barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
* and is paired with set_mb() in wait_woken().
*/
smp_wmb(); /* C */
wait->flags |= WQ_FLAG_WOKEN;
return default_wake_function(wait, mode, sync, key);
}
EXPORT_SYMBOL(woken_wake_function);
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
struct wait_bit_key *key = arg;