From 00e757b648c0935d703a9b8042312f4a76ee793b Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Tue, 8 Feb 2022 00:28:06 +0100 Subject: [PATCH 1/3] nvme: add nvme_complete_req tracepoint for batched completion Add NVMe request completion trace in nvme_complete_batch_req() because nvme:nvme_complete_req tracepoint is missing in case of request batched completion. Signed-off-by: Bean Huo Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 961a5f8a44d2..79005ea1a33e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -368,6 +368,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_complete_batch_req(struct request *req) { + trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); nvme_end_req_zoned(req); } From 63573807b27e0faf8065a28b1bbe1cbfb23c0130 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 7 Feb 2022 00:40:13 +0200 Subject: [PATCH 2/3] nvme-tcp: fix bogus request completion when failing to send AER AER is not backed by a real request, hence we should not incorrectly assume that when failing to send a nvme command, it is a normal request but rather check if this is an aer and if so complete the aer (similar to the normal completion path). Cc: stable@vger.kernel.org Signed-off-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 01e24b5703db..891a36d02e7c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -913,7 +913,15 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR); + if (nvme_tcp_async_req(req)) { + union nvme_result res = {}; + + nvme_complete_async_event(&req->queue->ctrl->ctrl, + cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); + } else { + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), + NVME_SC_HOST_PATH_ERROR); + } } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) From bf23747ee05320903177809648002601cd140cdd Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 11 Feb 2022 16:15:54 +0900 Subject: [PATCH 3/3] loop: revert "make autoclear operation asynchronous" The kernel test robot is reporting that xfstest which does umount ext2 on xfs umount xfs sequence started failing, for commit 322c4293ecc58110 ("loop: make autoclear operation asynchronous") removed a guarantee that fput() of backing file is processed before lo_release() from close() returns to user mode. And syzbot is reporting that deferring destroy_workqueue() from __loop_clr_fd() to a WQ context did not help [1]. Revert that commit. Link: https://syzkaller.appspot.com/bug?extid=831661966588c802aae9 [1] Reported-by: kernel test robot Acked-by: Jan Kara Reviewed-by: Christoph Hellwig Reported-by: syzbot Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/20220211071554.3424-1-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Jens Axboe --- drivers/block/loop.c | 65 ++++++++++++++++++++------------------------ drivers/block/loop.h | 1 - 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 01cbbfc4e9e2..150012ffb387 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1082,7 +1082,7 @@ out_putf: return error; } -static void __loop_clr_fd(struct loop_device *lo) +static void __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp; gfp_t gfp = lo->old_gfp_mask; @@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo) /* let user-space know about this change */ kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); mapping_set_gfp_mask(filp->f_mapping, gfp); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); @@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo) if (lo->lo_flags & LO_FLAGS_PARTSCAN) { int err; - mutex_lock(&lo->lo_disk->open_mutex); + /* + * open_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (!release) + mutex_lock(&lo->lo_disk->open_mutex); err = bdev_disk_changed(lo->lo_disk, false); - mutex_unlock(&lo->lo_disk->open_mutex); + if (!release) + mutex_unlock(&lo->lo_disk->open_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo->lo_number, err); /* Device is gone, no point in returning error */ } + /* + * lo->lo_state is set to Lo_unbound here after above partscan has + * finished. There cannot be anybody else entering __loop_clr_fd() as + * Lo_rundown state protects us from all the other places trying to + * change the 'lo' device. + */ lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART; - - fput(filp); -} - -static void loop_rundown_completed(struct loop_device *lo) -{ mutex_lock(&lo->lo_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&lo->lo_mutex); - module_put(THIS_MODULE); -} -static void loop_rundown_workfn(struct work_struct *work) -{ - struct loop_device *lo = container_of(work, struct loop_device, - rundown_work); - struct block_device *bdev = lo->lo_device; - struct gendisk *disk = lo->lo_disk; - - __loop_clr_fd(lo); - kobject_put(&bdev->bd_device.kobj); - module_put(disk->fops->owner); - loop_rundown_completed(lo); -} - -static void loop_schedule_rundown(struct loop_device *lo) -{ - struct block_device *bdev = lo->lo_device; - struct gendisk *disk = lo->lo_disk; - - __module_get(disk->fops->owner); - kobject_get(&bdev->bd_device.kobj); - INIT_WORK(&lo->rundown_work, loop_rundown_workfn); - queue_work(system_long_wq, &lo->rundown_work); + /* + * Need not hold lo_mutex to fput backing file. Calling fput holding + * lo_mutex triggers a circular lock dependency possibility warning as + * fput can take open_mutex which is usually taken before lo_mutex. + */ + fput(filp); } static int loop_clr_fd(struct loop_device *lo) @@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_rundown; mutex_unlock(&lo->lo_mutex); - __loop_clr_fd(lo); - loop_rundown_completed(lo); + __loop_clr_fd(lo, false); return 0; } @@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - loop_schedule_rundown(lo); + __loop_clr_fd(lo, true); return; } else if (lo->lo_state == Lo_bound) { /* diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 918a7a2dc025..082d4b6bfc6a 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -56,7 +56,6 @@ struct loop_device { struct gendisk *lo_disk; struct mutex lo_mutex; bool idr_visible; - struct work_struct rundown_work; }; struct loop_cmd {