From 93bd25bb69f46367ba8f82c578e0c05702ceb482 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 11 Nov 2019 23:34:31 -0700
Subject: [PATCH 1/7] io_uring: make timeout sequence == 0 mean no sequence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we make sequence == 0 be the same as sequence == 1, but that's
not super useful if the intent is really to have a timeout that's just
a pure timeout.

If the user passes in sqe->off == 0, then don't apply any sequence logic
to the request, let it purely be driven by the timeout specified.

Reported-by: 李通洲 <carter.li@eoitek.com>
Reviewed-by: 李通洲 <carter.li@eoitek.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f9a38998f2fc..87beca4377f7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -326,6 +326,7 @@ struct io_kiocb {
 #define REQ_F_TIMEOUT		1024	/* timeout request */
 #define REQ_F_ISREG		2048	/* regular file */
 #define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
+#define REQ_F_TIMEOUT_NOSEQ	8192	/* no timeout sequence */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -453,9 +454,13 @@ static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
 	struct io_kiocb *req;
 
 	req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
-	if (req && !__io_sequence_defer(ctx, req)) {
-		list_del_init(&req->list);
-		return req;
+	if (req) {
+		if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+			return NULL;
+		if (!__io_sequence_defer(ctx, req)) {
+			list_del_init(&req->list);
+			return req;
+		}
 	}
 
 	return NULL;
@@ -1941,18 +1946,24 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr)))
 		return -EFAULT;
 
+	req->flags |= REQ_F_TIMEOUT;
+
 	/*
 	 * sqe->off holds how many events that need to occur for this
-	 * timeout event to be satisfied.
+	 * timeout event to be satisfied. If it isn't set, then this is
+	 * a pure timeout request, sequence isn't used.
 	 */
 	count = READ_ONCE(sqe->off);
-	if (!count)
-		count = 1;
+	if (!count) {
+		req->flags |= REQ_F_TIMEOUT_NOSEQ;
+		spin_lock_irq(&ctx->completion_lock);
+		entry = ctx->timeout_list.prev;
+		goto add;
+	}
 
 	req->sequence = ctx->cached_sq_head + count - 1;
 	/* reuse it to store the count */
 	req->submit.sequence = count;
-	req->flags |= REQ_F_TIMEOUT;
 
 	/*
 	 * Insertion sort, ensuring the first entry in the list is always
@@ -1964,6 +1975,9 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		unsigned nxt_sq_head;
 		long long tmp, tmp_nxt;
 
+		if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
+			continue;
+
 		/*
 		 * Since cached_sq_head + count - 1 can overflow, use type long
 		 * long to store it.
@@ -1990,6 +2004,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		nxt->sequence++;
 	}
 	req->sequence -= span;
+add:
 	list_add(&req->list, entry);
 	spin_unlock_irq(&ctx->completion_lock);
 

From e3a5d8e386c3fb973fa75f2403622a8f3640ec06 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Tue, 12 Nov 2019 07:19:58 +0000
Subject: [PATCH 2/7] block: check bi_size overflow before merge

__bio_try_merge_page() may merge a page to bio without bio_full() check
and cause bi_size overflow.

The overflow typically ends up with sd_init_command() warning on zero
segment request with call trace like this:

    ------------[ cut here ]------------
    WARNING: CPU: 2 PID: 1986 at drivers/scsi/scsi_lib.c:1025 scsi_init_io+0x156/0x180
    CPU: 2 PID: 1986 Comm: kworker/2:1H Kdump: loaded Not tainted 5.4.0-rc7 #1
    Workqueue: kblockd blk_mq_run_work_fn
    RIP: 0010:scsi_init_io+0x156/0x180
    RSP: 0018:ffffa11487663bf0 EFLAGS: 00010246
    RAX: 00000000002be0a0 RBX: ffff8e6e9ff30118 RCX: 0000000000000000
    RDX: 00000000ffffffe1 RSI: 0000000000000000 RDI: ffff8e6e9ff30118
    RBP: ffffa11487663c18 R08: ffffa11487663d28 R09: ffff8e6e9ff30150
    R10: 0000000000000001 R11: 0000000000000000 R12: ffff8e6e9ff30000
    R13: 0000000000000001 R14: ffff8e74a1cf1800 R15: ffff8e6e9ff30000
    FS:  0000000000000000(0000) GS:ffff8e6ea7680000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007fff18cf0fe8 CR3: 0000000659f0a001 CR4: 00000000001606e0
    Call Trace:
     sd_init_command+0x326/0xb40 [sd_mod]
     scsi_queue_rq+0x502/0xaa0
     ? blk_mq_get_driver_tag+0xe7/0x120
     blk_mq_dispatch_rq_list+0x256/0x5a0
     ? elv_rb_del+0x24/0x30
     ? deadline_remove_request+0x7b/0xc0
     blk_mq_do_dispatch_sched+0xa3/0x140
     blk_mq_sched_dispatch_requests+0xfb/0x170
     __blk_mq_run_hw_queue+0x81/0x130
     blk_mq_run_work_fn+0x1b/0x20
     process_one_work+0x179/0x390
     worker_thread+0x4f/0x3e0
     kthread+0x105/0x140
     ? max_active_store+0x80/0x80
     ? kthread_bind+0x20/0x20
     ret_from_fork+0x35/0x40
    ---[ end trace f9036abf5af4a4d3 ]---
    blk_update_request: I/O error, dev sdd, sector 2875552 op 0x1:(WRITE) flags 0x0 phys_seg 0 prio class 0
    XFS (sdd1): writeback error on sector 2875552

__bio_try_merge_page() should check the overflow before actually doing
merge.

Fixes: 07173c3ec276c ("block: enable multipage bvecs")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index 8f0ed6228fc5..b1170ec18464 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;
 
-	if (bio->bi_vcnt > 0) {
+	if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {

From 5683e5406e94ae1bfb0d9516a18fdb281d0f8d1d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 14 Nov 2019 00:59:19 +0300
Subject: [PATCH 3/7] io_uring: Fix getting file for timeout

For timeout requests io_uring tries to grab a file with specified fd,
which is usually stdin/fd=0.
Update io_op_needs_file()

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 87beca4377f7..57ea54d5b0fb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2298,6 +2298,7 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
 	switch (op) {
 	case IORING_OP_NOP:
 	case IORING_OP_POLL_REMOVE:
+	case IORING_OP_TIMEOUT:
 		return false;
 	default:
 		return true;

From 5e559561a8d7e6d4adfce6aa8fbf3daa3dec1577 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 13 Nov 2019 16:12:46 -0700
Subject: [PATCH 4/7] io_uring: ensure registered buffer import returns the IO
 length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A test case was reported where two linked reads with registered buffers
failed the second link always. This is because we set the expected value
of a request in req->result, and if we don't get this result, then we
fail the dependent links. For some reason the registered buffer import
returned -ERROR/0, while the normal import returns -ERROR/length. This
broke linked commands with registered buffers.

Fix this by making io_import_fixed() correctly return the mapped length.

Cc: stable@vger.kernel.org # v5.3
Reported-by: 李通洲 <carter.li@eoitek.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 57ea54d5b0fb..2c819c3c855d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1230,7 +1230,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
 		}
 	}
 
-	return 0;
+	return len;
 }
 
 static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,

From 478de3380c1c7dbb0f65f545ee0185848413f3fe Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 14 Nov 2019 10:33:11 +0100
Subject: [PATCH 5/7] block, bfq: deschedule empty bfq_queues not referred by
 any process

Since commit 3726112ec731 ("block, bfq: re-schedule empty queues if
they deserve I/O plugging"), to prevent the service guarantees of a
bfq_queue from being violated, the bfq_queue may be left busy, i.e.,
scheduled for service, even if empty (see comments in
__bfq_bfqq_expire() for details). But, if no process will send
requests to the bfq_queue any longer, then there is no point in
keeping the bfq_queue scheduled for service.

In addition, keeping the bfq_queue scheduled for service, but with no
process reference any longer, may cause the bfq_queue to be freed when
descheduled from service. But this is assumed to never happen, and
causes a UAF if it happens. This, in turn, caused crashes [1, 2].

This commit fixes this issue by descheduling an empty bfq_queue when
it remains with not process reference.

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1767539
[2] https://bugzilla.kernel.org/show_bug.cgi?id=205447

Fixes: 3726112ec731 ("block, bfq: re-schedule empty queues if they deserve I/O plugging")
Reported-by: Chris Evich <cevich@redhat.com>
Reported-by: Patrick Dung <patdung100@gmail.com>
Reported-by: Thorsten Schubert <tschubert@bafh.org>
Tested-by: Thorsten Schubert <tschubert@bafh.org>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0319d6339822..0c6214497fcc 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 	}
 }
 
+
+static
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+	/*
+	 * To prevent bfqq's service guarantees from being violated,
+	 * bfqq may be left busy, i.e., queued for service, even if
+	 * empty (see comments in __bfq_bfqq_expire() for
+	 * details). But, if no process will send requests to bfqq any
+	 * longer, then there is no point in keeping bfqq queued for
+	 * service. In addition, keeping bfqq queued for service, but
+	 * with no process ref any longer, may have caused bfqq to be
+	 * freed when dequeued from service. But this is assumed to
+	 * never happen.
+	 */
+	if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&
+	    bfqq != bfqd->in_service_queue)
+		bfq_del_bfqq_busy(bfqd, bfqq, false);
+
+	bfq_put_queue(bfqq);
+}
+
 static void
 bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
@@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 	 */
 	new_bfqq->pid = -1;
 	bfqq->bic = NULL;
-	/* release process reference to bfqq */
-	bfq_put_queue(bfqq);
+	bfq_release_process_ref(bfqd, bfqq);
 }
 
 static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
 	bfq_put_cooperator(bfqq);
 
-	bfq_put_queue(bfqq); /* release process reference */
+	bfq_release_process_ref(bfqd, bfqq);
 }
 
 static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
 
 	bfqq = bic_to_bfqq(bic, false);
 	if (bfqq) {
-		/* release process reference on this queue */
-		bfq_put_queue(bfqq);
+		bfq_release_process_ref(bfqd, bfqq);
 		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
 		bic_set_bfqq(bic, bfqq, false);
 	}
@@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
 
 	bfq_put_cooperator(bfqq);
 
-	bfq_put_queue(bfqq);
+	bfq_release_process_ref(bfqq->bfqd, bfqq);
 	return NULL;
 }
 

From 8b37bc277fb459fa100808880a9d4e0641fff444 Mon Sep 17 00:00:00 2001
From: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Date: Wed, 13 Nov 2019 15:21:31 +0800
Subject: [PATCH 6/7] iocost: check active_list of all the ancestors in
 iocg_activate()

There is a bug that checking the same active_list over and over again
in iocg_activate(). The intention of the code was checking whether all
the ancestors and self have already been activated. So fix it.

Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a7ed434eae03..e01267f99183 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 	atomic64_set(&iocg->active_period, cur_period);
 
 	/* already activated or breaking leaf-only constraint? */
-	for (i = iocg->level; i > 0; i--)
-		if (!list_empty(&iocg->active_list))
+	if (!list_empty(&iocg->active_list))
+		goto succeed_unlock;
+	for (i = iocg->level - 1; i > 0; i--)
+		if (!list_empty(&iocg->ancestors[i]->active_list))
 			goto fail_unlock;
+
 	if (iocg->child_active_sum)
 		goto fail_unlock;
 
@@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 		ioc_start_period(ioc, now);
 	}
 
+succeed_unlock:
 	spin_unlock_irq(&ioc->lock);
 	return true;
 

From dcb77e4b274b8f13ac6482dfb09160cd2fae9a40 Mon Sep 17 00:00:00 2001
From: Chuhong Yuan <hslester96@gmail.com>
Date: Wed, 13 Nov 2019 14:38:47 +0800
Subject: [PATCH 7/7] rsxx: add missed destroy_workqueue calls in remove

The driver misses calling destroy_workqueue in remove like what is done
when probe fails.
Add the missed calls to fix it.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 76b73ddf8fd7..10f6368117d8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 
 	cancel_work_sync(&card->event_work);
 
+	destroy_workqueue(card->event_wq);
 	rsxx_destroy_dev(card);
 	rsxx_dma_destroy(card);
+	destroy_workqueue(card->creg_ctrl.creg_wq);
 
 	spin_lock_irqsave(&card->irq_lock, flags);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);