io_uring: fix concurrent parking

If io_sq_thread_park() of one task got rescheduled right after
set_bit(), before it gets back to mutex_lock() there can happen
park()/unpark() by another task with SQPOLL locking again and
continuing running never seeing that first set_bit(SHOULD_PARK),
so won't even try to put the mutex down for parking.

It will get parked eventually when SQPOLL drops the lock for reschedule,
but may be problematic and will get in the way of further fixes.

Account number of tasks waiting for parking with a new atomic variable
park_pending and adjust SHOULD_PARK accordingly. It doesn't entirely
replaces SHOULD_PARK bit with this atomic var because it's convenient
to have it as a bit in the state and will help to do optimisations
later.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2021-03-14 20:57:12 +00:00 committed by Jens Axboe
parent f6d54255f4
commit 9e138a4834

View File

@ -258,6 +258,7 @@ enum {
struct io_sq_data {
refcount_t refs;
atomic_t park_pending;
struct mutex lock;
/* ctx's that are using this sqd */
@ -7067,7 +7068,13 @@ static void io_sq_thread_unpark(struct io_sq_data *sqd)
{
WARN_ON_ONCE(sqd->thread == current);
/*
* Do the dance but not conditional clear_bit() because it'd race with
* other threads incrementing park_pending and setting the bit.
*/
clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
if (atomic_dec_return(&sqd->park_pending))
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
mutex_unlock(&sqd->lock);
}
@ -7076,10 +7083,9 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
{
WARN_ON_ONCE(sqd->thread == current);
atomic_inc(&sqd->park_pending);
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
mutex_lock(&sqd->lock);
/* set again for consistency, in case concurrent parks are happening */
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
if (sqd->thread)
wake_up_process(sqd->thread);
}
@ -7099,6 +7105,8 @@ static void io_sq_thread_stop(struct io_sq_data *sqd)
static void io_put_sq_data(struct io_sq_data *sqd)
{
if (refcount_dec_and_test(&sqd->refs)) {
WARN_ON_ONCE(atomic_read(&sqd->park_pending));
io_sq_thread_stop(sqd);
kfree(sqd);
}
@ -7172,6 +7180,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
if (!sqd)
return ERR_PTR(-ENOMEM);
atomic_set(&sqd->park_pending, 0);
refcount_set(&sqd->refs, 1);
INIT_LIST_HEAD(&sqd->ctx_list);
mutex_init(&sqd->lock);