From 93bd25bb69f46367ba8f82c578e0c05702ceb482 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 Nov 2019 23:34:31 -0700 Subject: [PATCH 1/7] io_uring: make timeout sequence == 0 mean no sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we make sequence == 0 be the same as sequence == 1, but that's not super useful if the intent is really to have a timeout that's just a pure timeout. If the user passes in sqe->off == 0, then don't apply any sequence logic to the request, let it purely be driven by the timeout specified. Reported-by: 李通洲 Reviewed-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f9a38998f2fc..87beca4377f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -326,6 +326,7 @@ struct io_kiocb { #define REQ_F_TIMEOUT 1024 /* timeout request */ #define REQ_F_ISREG 2048 /* regular file */ #define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */ +#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */ u64 user_data; u32 result; u32 sequence; @@ -453,9 +454,13 @@ static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx) struct io_kiocb *req; req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list); - if (req && !__io_sequence_defer(ctx, req)) { - list_del_init(&req->list); - return req; + if (req) { + if (req->flags & REQ_F_TIMEOUT_NOSEQ) + return NULL; + if (!__io_sequence_defer(ctx, req)) { + list_del_init(&req->list); + return req; + } } return NULL; @@ -1941,18 +1946,24 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr))) return -EFAULT; + req->flags |= REQ_F_TIMEOUT; + /* * sqe->off holds how many events that need to occur for this - * timeout event to be satisfied. + * timeout event to be satisfied. If it isn't set, then this is + * a pure timeout request, sequence isn't used. */ count = READ_ONCE(sqe->off); - if (!count) - count = 1; + if (!count) { + req->flags |= REQ_F_TIMEOUT_NOSEQ; + spin_lock_irq(&ctx->completion_lock); + entry = ctx->timeout_list.prev; + goto add; + } req->sequence = ctx->cached_sq_head + count - 1; /* reuse it to store the count */ req->submit.sequence = count; - req->flags |= REQ_F_TIMEOUT; /* * Insertion sort, ensuring the first entry in the list is always @@ -1964,6 +1975,9 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned nxt_sq_head; long long tmp, tmp_nxt; + if (nxt->flags & REQ_F_TIMEOUT_NOSEQ) + continue; + /* * Since cached_sq_head + count - 1 can overflow, use type long * long to store it. @@ -1990,6 +2004,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe) nxt->sequence++; } req->sequence -= span; +add: list_add(&req->list, entry); spin_unlock_irq(&ctx->completion_lock); From e3a5d8e386c3fb973fa75f2403622a8f3640ec06 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 12 Nov 2019 07:19:58 +0000 Subject: [PATCH 2/7] block: check bi_size overflow before merge __bio_try_merge_page() may merge a page to bio without bio_full() check and cause bi_size overflow. The overflow typically ends up with sd_init_command() warning on zero segment request with call trace like this: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1986 at drivers/scsi/scsi_lib.c:1025 scsi_init_io+0x156/0x180 CPU: 2 PID: 1986 Comm: kworker/2:1H Kdump: loaded Not tainted 5.4.0-rc7 #1 Workqueue: kblockd blk_mq_run_work_fn RIP: 0010:scsi_init_io+0x156/0x180 RSP: 0018:ffffa11487663bf0 EFLAGS: 00010246 RAX: 00000000002be0a0 RBX: ffff8e6e9ff30118 RCX: 0000000000000000 RDX: 00000000ffffffe1 RSI: 0000000000000000 RDI: ffff8e6e9ff30118 RBP: ffffa11487663c18 R08: ffffa11487663d28 R09: ffff8e6e9ff30150 R10: 0000000000000001 R11: 0000000000000000 R12: ffff8e6e9ff30000 R13: 0000000000000001 R14: ffff8e74a1cf1800 R15: ffff8e6e9ff30000 FS: 0000000000000000(0000) GS:ffff8e6ea7680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff18cf0fe8 CR3: 0000000659f0a001 CR4: 00000000001606e0 Call Trace: sd_init_command+0x326/0xb40 [sd_mod] scsi_queue_rq+0x502/0xaa0 ? blk_mq_get_driver_tag+0xe7/0x120 blk_mq_dispatch_rq_list+0x256/0x5a0 ? elv_rb_del+0x24/0x30 ? deadline_remove_request+0x7b/0xc0 blk_mq_do_dispatch_sched+0xa3/0x140 blk_mq_sched_dispatch_requests+0xfb/0x170 __blk_mq_run_hw_queue+0x81/0x130 blk_mq_run_work_fn+0x1b/0x20 process_one_work+0x179/0x390 worker_thread+0x4f/0x3e0 kthread+0x105/0x140 ? max_active_store+0x80/0x80 ? kthread_bind+0x20/0x20 ret_from_fork+0x35/0x40 ---[ end trace f9036abf5af4a4d3 ]--- blk_update_request: I/O error, dev sdd, sector 2875552 op 0x1:(WRITE) flags 0x0 phys_seg 0 prio class 0 XFS (sdd1): writeback error on sector 2875552 __bio_try_merge_page() should check the overflow before actually doing merge. Fixes: 07173c3ec276c ("block: enable multipage bvecs") Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jun'ichi Nomura Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 8f0ed6228fc5..b1170ec18464 100644 --- a/block/bio.c +++ b/block/bio.c @@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return false; - if (bio->bi_vcnt > 0) { + if (bio->bi_vcnt > 0 && !bio_full(bio, len)) { struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page_is_mergeable(bv, page, len, off, same_page)) { From 5683e5406e94ae1bfb0d9516a18fdb281d0f8d1d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 14 Nov 2019 00:59:19 +0300 Subject: [PATCH 3/7] io_uring: Fix getting file for timeout For timeout requests io_uring tries to grab a file with specified fd, which is usually stdin/fd=0. Update io_op_needs_file() Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 87beca4377f7..57ea54d5b0fb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2298,6 +2298,7 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe) switch (op) { case IORING_OP_NOP: case IORING_OP_POLL_REMOVE: + case IORING_OP_TIMEOUT: return false; default: return true; From 5e559561a8d7e6d4adfce6aa8fbf3daa3dec1577 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Nov 2019 16:12:46 -0700 Subject: [PATCH 4/7] io_uring: ensure registered buffer import returns the IO length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A test case was reported where two linked reads with registered buffers failed the second link always. This is because we set the expected value of a request in req->result, and if we don't get this result, then we fail the dependent links. For some reason the registered buffer import returned -ERROR/0, while the normal import returns -ERROR/length. This broke linked commands with registered buffers. Fix this by making io_import_fixed() correctly return the mapped length. Cc: stable@vger.kernel.org # v5.3 Reported-by: 李通洲 Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 57ea54d5b0fb..2c819c3c855d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1230,7 +1230,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, } } - return 0; + return len; } static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, From 478de3380c1c7dbb0f65f545ee0185848413f3fe Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 14 Nov 2019 10:33:11 +0100 Subject: [PATCH 5/7] block, bfq: deschedule empty bfq_queues not referred by any process Since commit 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging"), to prevent the service guarantees of a bfq_queue from being violated, the bfq_queue may be left busy, i.e., scheduled for service, even if empty (see comments in __bfq_bfqq_expire() for details). But, if no process will send requests to the bfq_queue any longer, then there is no point in keeping the bfq_queue scheduled for service. In addition, keeping the bfq_queue scheduled for service, but with no process reference any longer, may cause the bfq_queue to be freed when descheduled from service. But this is assumed to never happen, and causes a UAF if it happens. This, in turn, caused crashes [1, 2]. This commit fixes this issue by descheduling an empty bfq_queue when it remains with not process reference. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1767539 [2] https://bugzilla.kernel.org/show_bug.cgi?id=205447 Fixes: 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging") Reported-by: Chris Evich Reported-by: Patrick Dung Reported-by: Thorsten Schubert Tested-by: Thorsten Schubert Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0319d6339822..0c6214497fcc 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } } + +static +void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + /* + * To prevent bfqq's service guarantees from being violated, + * bfqq may be left busy, i.e., queued for service, even if + * empty (see comments in __bfq_bfqq_expire() for + * details). But, if no process will send requests to bfqq any + * longer, then there is no point in keeping bfqq queued for + * service. In addition, keeping bfqq queued for service, but + * with no process ref any longer, may have caused bfqq to be + * freed when dequeued from service. But this is assumed to + * never happen. + */ + if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && + bfqq != bfqd->in_service_queue) + bfq_del_bfqq_busy(bfqd, bfqq, false); + + bfq_put_queue(bfqq); +} + static void bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) @@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, */ new_bfqq->pid = -1; bfqq->bic = NULL; - /* release process reference to bfqq */ - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqd, bfqq); } static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, @@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - bfq_put_queue(bfqq); /* release process reference */ + bfq_release_process_ref(bfqd, bfqq); } static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) @@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfqq = bic_to_bfqq(bic, false); if (bfqq) { - /* release process reference on this queue */ - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqd, bfqq); bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); bic_set_bfqq(bic, bfqq, false); } @@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - bfq_put_queue(bfqq); + bfq_release_process_ref(bfqq->bfqd, bfqq); return NULL; } From 8b37bc277fb459fa100808880a9d4e0641fff444 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 13 Nov 2019 15:21:31 +0800 Subject: [PATCH 6/7] iocost: check active_list of all the ancestors in iocg_activate() There is a bug that checking the same active_list over and over again in iocg_activate(). The intention of the code was checking whether all the ancestors and self have already been activated. So fix it. Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Acked-by: Tejun Heo Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- block/blk-iocost.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a7ed434eae03..e01267f99183 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) atomic64_set(&iocg->active_period, cur_period); /* already activated or breaking leaf-only constraint? */ - for (i = iocg->level; i > 0; i--) - if (!list_empty(&iocg->active_list)) + if (!list_empty(&iocg->active_list)) + goto succeed_unlock; + for (i = iocg->level - 1; i > 0; i--) + if (!list_empty(&iocg->ancestors[i]->active_list)) goto fail_unlock; + if (iocg->child_active_sum) goto fail_unlock; @@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) ioc_start_period(ioc, now); } +succeed_unlock: spin_unlock_irq(&ioc->lock); return true; From dcb77e4b274b8f13ac6482dfb09160cd2fae9a40 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 13 Nov 2019 14:38:47 +0800 Subject: [PATCH 7/7] rsxx: add missed destroy_workqueue calls in remove The driver misses calling destroy_workqueue in remove like what is done when probe fails. Add the missed calls to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 76b73ddf8fd7..10f6368117d8 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) cancel_work_sync(&card->event_work); + destroy_workqueue(card->event_wq); rsxx_destroy_dev(card); rsxx_dma_destroy(card); + destroy_workqueue(card->creg_ctrl.creg_wq); spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL);