From d2efa81dd8ba50b800accb3ef2b73474e5af9648 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 08:03:42 +0200 Subject: [PATCH 01/34] mtip32xx: simplify sysfs setup Pass the driver specific attributes directly to device_add_disk instead of manually creating them after the disk registration. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210614060343.3965416-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 79 ++++++------------------------- 1 file changed, 15 insertions(+), 64 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ff3e7b3f5ad8..d5ff593e6658 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2160,6 +2160,20 @@ static ssize_t mtip_hw_show_status(struct device *dev, static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL); +static struct attribute *mtip_disk_attrs[] = { + &dev_attr_status.attr, + NULL, +}; + +static const struct attribute_group mtip_disk_attr_group = { + .attrs = mtip_disk_attrs, +}; + +static const struct attribute_group *mtip_disk_attr_groups[] = { + &mtip_disk_attr_group, + NULL, +}; + /* debugsfs entries */ static ssize_t show_device_status(struct device_driver *drv, char *buf) @@ -2374,47 +2388,6 @@ static const struct file_operations mtip_flags_fops = { .llseek = no_llseek, }; -/* - * Create the sysfs related attributes. - * - * @dd Pointer to the driver data structure. - * @kobj Pointer to the kobj for the block device. - * - * return value - * 0 Operation completed successfully. - * -EINVAL Invalid parameter. - */ -static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) -{ - if (!kobj || !dd) - return -EINVAL; - - if (sysfs_create_file(kobj, &dev_attr_status.attr)) - dev_warn(&dd->pdev->dev, - "Error creating 'status' sysfs entry\n"); - return 0; -} - -/* - * Remove the sysfs related attributes. - * - * @dd Pointer to the driver data structure. - * @kobj Pointer to the kobj for the block device. - * - * return value - * 0 Operation completed successfully. - * -EINVAL Invalid parameter. - */ -static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) -{ - if (!kobj || !dd) - return -EINVAL; - - sysfs_remove_file(kobj, &dev_attr_status.attr); - - return 0; -} - static int mtip_hw_debugfs_init(struct driver_data *dd) { if (!dfs_parent) @@ -3566,7 +3539,6 @@ static int mtip_block_initialize(struct driver_data *dd) int rv = 0, wait_for_rebuild = 0; sector_t capacity; unsigned int index = 0; - struct kobject *kobj; if (dd->disk) goto skip_create_disk; /* hw init done, before rebuild */ @@ -3672,17 +3644,7 @@ skip_create_disk: set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk, NULL); - - /* - * Now that the disk is active, initialize any sysfs attributes - * managed by the protocol layer. - */ - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_init(dd, kobj); - kobject_put(kobj); - } + device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); @@ -3751,8 +3713,6 @@ static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) */ static int mtip_block_remove(struct driver_data *dd) { - struct kobject *kobj; - mtip_hw_debugfs_exit(dd); if (dd->mtip_svc_handler) { @@ -3761,15 +3721,6 @@ static int mtip_block_remove(struct driver_data *dd) kthread_stop(dd->mtip_svc_handler); } - /* Clean up the sysfs attributes, if created */ - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } - } - if (!dd->sr) { /* * Explicitly wait here for IOs to quiesce, From cc25592caa5dfbb3ae17ef616a8c8f2e910ae549 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 08:03:43 +0200 Subject: [PATCH 02/34] mtip32xx: use blk_mq_alloc_disk and blk_cleanup_disk Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and request_queue allocation. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210614060343.3965416-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 73 ++++++++++++------------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d5ff593e6658..901855717cb5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3548,14 +3548,33 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); - if (dd->disk == NULL) { + memset(&dd->tags, 0, sizeof(dd->tags)); + dd->tags.ops = &mtip_mq_ops; + dd->tags.nr_hw_queues = 1; + dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS; + dd->tags.reserved_tags = 1; + dd->tags.cmd_size = sizeof(struct mtip_cmd); + dd->tags.numa_node = dd->numa_node; + dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; + dd->tags.driver_data = dd; + dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; + + rv = blk_mq_alloc_tag_set(&dd->tags); + if (rv) { dev_err(&dd->pdev->dev, - "Unable to allocate gendisk structure\n"); - rv = -EINVAL; - goto alloc_disk_error; + "Unable to allocate request queue\n"); + goto block_queue_alloc_tag_error; } + dd->disk = blk_mq_alloc_disk(&dd->tags, dd); + if (IS_ERR(dd->disk)) { + dev_err(&dd->pdev->dev, + "Unable to allocate request queue\n"); + rv = -ENOMEM; + goto block_queue_alloc_init_error; + } + dd->queue = dd->disk->queue; + rv = ida_alloc(&rssd_index_ida, GFP_KERNEL); if (rv < 0) goto ida_get_error; @@ -3577,36 +3596,6 @@ static int mtip_block_initialize(struct driver_data *dd) mtip_hw_debugfs_init(dd); - memset(&dd->tags, 0, sizeof(dd->tags)); - dd->tags.ops = &mtip_mq_ops; - dd->tags.nr_hw_queues = 1; - dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS; - dd->tags.reserved_tags = 1; - dd->tags.cmd_size = sizeof(struct mtip_cmd); - dd->tags.numa_node = dd->numa_node; - dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; - dd->tags.driver_data = dd; - dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; - - rv = blk_mq_alloc_tag_set(&dd->tags); - if (rv) { - dev_err(&dd->pdev->dev, - "Unable to allocate request queue\n"); - goto block_queue_alloc_tag_error; - } - - /* Allocate the request queue. */ - dd->queue = blk_mq_init_queue(&dd->tags); - if (IS_ERR(dd->queue)) { - dev_err(&dd->pdev->dev, - "Unable to allocate request queue\n"); - rv = -ENOMEM; - goto block_queue_alloc_init_error; - } - - dd->disk->queue = dd->queue; - dd->queue->queuedata = dd; - skip_create_disk: /* Initialize the protocol layer. */ wait_for_rebuild = mtip_hw_get_identify(dd); @@ -3671,23 +3660,17 @@ start_service_thread: kthread_run_error: /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); - read_capacity_error: init_hw_cmds_error: - blk_cleanup_queue(dd->queue); -block_queue_alloc_init_error: - blk_mq_free_tag_set(&dd->tags); -block_queue_alloc_tag_error: mtip_hw_debugfs_exit(dd); disk_index_error: ida_free(&rssd_index_ida, index); - ida_get_error: - put_disk(dd->disk); - -alloc_disk_error: + blk_cleanup_disk(dd->disk); +block_queue_alloc_init_error: + blk_mq_free_tag_set(&dd->tags); +block_queue_alloc_tag_error: mtip_hw_exit(dd); /* De-initialize the protocol layer. */ - protocol_init_error: return rv; } From 2f43dbf3a7423ba14d827965d37fb6a56aa90009 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 08:02:31 +0200 Subject: [PATCH 03/34] null_blk: remove an unused variable assignment in null_add_dev Fix up the recent blk_alloc_disk conversion. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210614060231.3965278-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 3b320b005aa8..d734e9ee1546 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1849,7 +1849,6 @@ static int null_add_dev(struct nullb_device *dev) if (!null_setup_fault()) goto out_cleanup_tags; - rv = -ENOMEM; nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); if (IS_ERR(nullb->disk)) { From 7eb90f7e90a85b635b31bc0ac35846880c7470e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 08:07:58 +0200 Subject: [PATCH 04/34] ubd: remove the code to register as the legacy IDE driver With the legacy IDE driver long deprecated, and modern userspace being much more flexible about dev_t assignments there is no reason to fake a registration as the legacy IDE driver in ubd. This registeration is a little problematic as it registers the same request_queue for multiple gendisks, so just remove it. Signed-off-by: Christoph Hellwig Acked-By: Anton Ivanov Link: https://lore.kernel.org/r/20210614060759.3965724-2-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 104 ++++--------------------------------- 1 file changed, 11 insertions(+), 93 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 8e0b43cf089f..f508d45f7a69 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -125,9 +125,7 @@ static const struct block_device_operations ubd_blops = { }; /* Protected by ubd_lock */ -static int fake_major = UBD_MAJOR; static struct gendisk *ubd_gendisk[MAX_DEV]; -static struct gendisk *fake_gendisk[MAX_DEV]; #ifdef CONFIG_BLK_DEV_UBD_SYNC #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ @@ -197,54 +195,19 @@ struct ubd { /* Protected by ubd_lock */ static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; -/* Only changed by fake_ide_setup which is a setup */ -static int fake_ide = 0; -static struct proc_dir_entry *proc_ide_root = NULL; -static struct proc_dir_entry *proc_ide = NULL; - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd); -static void make_proc_ide(void) -{ - proc_ide_root = proc_mkdir("ide", NULL); - proc_ide = proc_mkdir("ide0", proc_ide_root); -} - -static int fake_ide_media_proc_show(struct seq_file *m, void *v) -{ - seq_puts(m, "disk\n"); - return 0; -} - -static void make_ide_entries(const char *dev_name) -{ - struct proc_dir_entry *dir, *ent; - char name[64]; - - if(proc_ide_root == NULL) make_proc_ide(); - - dir = proc_mkdir(dev_name, proc_ide); - if(!dir) return; - - ent = proc_create_single("media", S_IRUGO, dir, - fake_ide_media_proc_show); - if(!ent) return; - snprintf(name, sizeof(name), "ide0/%s", dev_name); - proc_symlink(dev_name, proc_ide_root, name); -} - static int fake_ide_setup(char *str) { - fake_ide = 1; + pr_warn("The fake_ide option has been removed\n"); return 1; } - __setup("fake_ide", fake_ide_setup); __uml_help(fake_ide_setup, "fake_ide\n" -" Create ide0 entries that map onto ubd devices.\n\n" +" Obsolete stub.\n\n" ); static int parse_unit(char **ptr) @@ -296,20 +259,8 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) return err; } - mutex_lock(&ubd_lock); - if (fake_major != UBD_MAJOR) { - *error_out = "Can't assign a fake major twice"; - goto out1; - } - - fake_major = major; - - printk(KERN_INFO "Setting extra ubd major number to %d\n", - major); - err = 0; - out1: - mutex_unlock(&ubd_lock); - return err; + pr_warn("fake major not supported any more\n"); + return 0; } n = parse_unit(&str); @@ -917,7 +868,6 @@ static const struct attribute_group *ubd_attr_groups[] = { static int ubd_disk_register(int major, u64 size, int unit, struct gendisk **disk_out) { - struct device *parent = NULL; struct gendisk *disk; disk = alloc_disk(1 << UBD_SHIFT); @@ -928,24 +878,17 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->first_minor = unit << UBD_SHIFT; disk->fops = &ubd_blops; set_capacity(disk, size / 512); - if (major == UBD_MAJOR) - sprintf(disk->disk_name, "ubd%c", 'a' + unit); - else - sprintf(disk->disk_name, "ubd_fake%d", unit); + sprintf(disk->disk_name, "ubd%c", 'a' + unit); - /* sysfs register (not for ide fake devices) */ - if (major == UBD_MAJOR) { - ubd_devs[unit].pdev.id = unit; - ubd_devs[unit].pdev.name = DRIVER_NAME; - ubd_devs[unit].pdev.dev.release = ubd_device_release; - dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); - platform_device_register(&ubd_devs[unit].pdev); - parent = &ubd_devs[unit].pdev.dev; - } + ubd_devs[unit].pdev.id = unit; + ubd_devs[unit].pdev.name = DRIVER_NAME; + ubd_devs[unit].pdev.dev.release = ubd_device_release; + dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); + platform_device_register(&ubd_devs[unit].pdev); disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk, ubd_attr_groups); + device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); *disk_out = disk; return 0; @@ -1001,17 +944,6 @@ static int ubd_add(int n, char **error_out) goto out_cleanup_tags; } - if (fake_major != UBD_MAJOR) - ubd_disk_register(fake_major, ubd_dev->size, n, - &fake_gendisk[n]); - - /* - * Perhaps this should also be under the "if (fake_major)" above - * using the fake_disk->disk_name - */ - if (fake_ide) - make_ide_entries(ubd_gendisk[n]->disk_name); - err = 0; out: return err; @@ -1126,12 +1058,6 @@ static int ubd_remove(int n, char **error_out) put_disk(disk); } - if(fake_gendisk[n] != NULL){ - del_gendisk(fake_gendisk[n]); - put_disk(fake_gendisk[n]); - fake_gendisk[n] = NULL; - } - err = 0; platform_device_unregister(&ubd_dev->pdev); out: @@ -1188,14 +1114,6 @@ static int __init ubd_init(void) if (register_blkdev(UBD_MAJOR, "ubd")) return -1; - if (fake_major != UBD_MAJOR) { - char name[sizeof("ubd_nnn\0")]; - - snprintf(name, sizeof(name), "ubd_%d", fake_major); - if (register_blkdev(fake_major, "ubd")) - return -1; - } - irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), GFP_KERNEL From 35efb594c3a8bbd41fca67658b03bf99441d488b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 08:07:59 +0200 Subject: [PATCH 05/34] ubd: use blk_mq_alloc_disk and blk_cleanup_disk Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and request_queue allocation. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210614060759.3965724-3-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 44 ++++++++++++-------------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f508d45f7a69..0b86aa1b12f1 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -825,7 +825,6 @@ static void ubd_device_release(struct device *dev) { struct ubd *ubd_dev = dev_get_drvdata(dev); - blk_cleanup_queue(ubd_dev->queue); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -865,17 +864,12 @@ static const struct attribute_group *ubd_attr_groups[] = { NULL, }; -static int ubd_disk_register(int major, u64 size, int unit, - struct gendisk **disk_out) +static void ubd_disk_register(int major, u64 size, int unit, + struct gendisk *disk) { - struct gendisk *disk; - - disk = alloc_disk(1 << UBD_SHIFT); - if(disk == NULL) - return -ENOMEM; - disk->major = major; disk->first_minor = unit << UBD_SHIFT; + disk->minors = 1 << UBD_SHIFT; disk->fops = &ubd_blops; set_capacity(disk, size / 512); sprintf(disk->disk_name, "ubd%c", 'a' + unit); @@ -889,9 +883,6 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); - - *disk_out = disk; - return 0; } #define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) @@ -903,6 +894,7 @@ static const struct blk_mq_ops ubd_mq_ops = { static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; + struct gendisk *disk; int err = 0; if(ubd_dev->file == NULL) @@ -927,32 +919,24 @@ static int ubd_add(int n, char **error_out) if (err) goto out; - ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); - if (IS_ERR(ubd_dev->queue)) { - err = PTR_ERR(ubd_dev->queue); + disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_cleanup_tags; } + ubd_dev->queue = disk->queue; - ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); - blk_queue_max_segments(ubd_dev->queue, MAX_SG); blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); - err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); - if(err){ - *error_out = "Failed to register device"; - goto out_cleanup_tags; - } - - err = 0; -out: - return err; + ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); + ubd_gendisk[n] = disk; + return 0; out_cleanup_tags: blk_mq_free_tag_set(&ubd_dev->tag_set); - if (!(IS_ERR(ubd_dev->queue))) - blk_cleanup_queue(ubd_dev->queue); - goto out; +out: + return err; } static int ubd_config(char *str, char **error_out) @@ -1055,7 +1039,7 @@ static int ubd_remove(int n, char **error_out) ubd_gendisk[n] = NULL; if(disk != NULL){ del_gendisk(disk); - put_disk(disk); + blk_cleanup_disk(disk); } err = 0; From 6497ef8df568afbf5f3e38825a4590ff41611a54 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 29 Apr 2021 15:58:28 +0530 Subject: [PATCH 06/34] nbd: provide a way for userspace processes to identify device backends Problem: On reconfigure of device, there is no way to defend if the backend storage is matching with the initial backend storage. Say, if an initial connect request for backend "pool1/image1" got mapped to /dev/nbd0 and the userspace process is terminated. A next reconfigure request within NBD_ATTR_DEAD_CONN_TIMEOUT is allowed to use /dev/nbd0 for a different backend "pool1/image2" For example, an operation like below could be dangerous: $ sudo rbd-nbd map --try-netlink rbd-pool/ext4-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="bfc444b4-64b1-418f-8b36-6e0d170cfc04" TYPE="ext4" $ sudo pkill -9 rbd-nbd $ sudo rbd-nbd attach --try-netlink --device /dev/nbd0 rbd-pool/xfs-image /dev/nbd0 $ sudo blkid /dev/nbd0 /dev/nbd0: UUID="d29bf343-6570-4069-a9ea-2fa156ced908" TYPE="xfs" Solution: Provide a way for userspace processes to keep some metadata to identify between the device and the backend, so that when a reconfigure request is made, we can compare and avoid such dangerous operations. With this solution, as part of the initial connect request, backend path can be stored in the sysfs per device config, so that on a reconfigure request it's easy to check if the backend path matches with the initial connect backend path. Please note, ioctl interface to nbd will not have these changes, as there won't be any reconfigure. Signed-off-by: Prasanna Kumar Kalever Reviewed-by: Xiubo Li Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210429102828.31248-1-prasanna.kalever@redhat.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 60 +++++++++++++++++++++++++++++++- include/uapi/linux/nbd-netlink.h | 1 + 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 614d82e7fae4..b7d663736d35 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -79,6 +79,7 @@ struct link_dead_args { #define NBD_RT_HAS_CONFIG_REF 4 #define NBD_RT_BOUND 5 #define NBD_RT_DISCONNECT_ON_CLOSE 6 +#define NBD_RT_HAS_BACKEND_FILE 7 #define NBD_DESTROY_ON_DISCONNECT 0 #define NBD_DISCONNECT_REQUESTED 1 @@ -119,6 +120,8 @@ struct nbd_device { struct completion *destroy_complete; unsigned long flags; + + char *backend; }; #define NBD_CMD_REQUEUED 1 @@ -216,6 +219,20 @@ static const struct device_attribute pid_attr = { .show = pid_show, }; +static ssize_t backend_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + + return sprintf(buf, "%s\n", nbd->backend ?: ""); +} + +static const struct device_attribute backend_attr = { + .attr = { .name = "backend", .mode = 0444}, + .show = backend_show, +}; + static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; @@ -1211,6 +1228,12 @@ static void nbd_config_put(struct nbd_device *nbd) &config->runtime_flags)) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); nbd->task_recv = NULL; + if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE, + &config->runtime_flags)) { + device_remove_file(disk_to_dev(nbd->disk), &backend_attr); + kfree(nbd->backend); + nbd->backend = NULL; + } nbd_clear_sock(nbd); if (config->num_connections) { int i; @@ -1270,7 +1293,7 @@ static int nbd_start_device(struct nbd_device *nbd) error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (error) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + dev_err(disk_to_dev(nbd->disk), "device_create_file failed for pid!\n"); return error; } set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags); @@ -1657,6 +1680,7 @@ static int nbd_dev_add(int index) BLK_MQ_F_BLOCKING; nbd->tag_set.driver_data = nbd; nbd->destroy_complete = NULL; + nbd->backend = NULL; err = blk_mq_alloc_tag_set(&nbd->tag_set); if (err) @@ -1743,6 +1767,7 @@ static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, + [NBD_ATTR_BACKEND_IDENTIFIER] = { .type = NLA_STRING}, }; static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { @@ -1945,6 +1970,23 @@ again: } } ret = nbd_start_device(nbd); + if (ret) + goto out; + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { + nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], + GFP_KERNEL); + if (!nbd->backend) { + ret = -ENOMEM; + goto out; + } + } + ret = device_create_file(disk_to_dev(nbd->disk), &backend_attr); + if (ret) { + dev_err(disk_to_dev(nbd->disk), + "device_create_file failed for backend!\n"); + goto out; + } + set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags); out: mutex_unlock(&nbd->config_lock); if (!ret) { @@ -2037,6 +2079,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) index); return -EINVAL; } + if (nbd->backend) { + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { + if (nla_strcmp(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], + nbd->backend)) { + mutex_unlock(&nbd_index_mutex); + dev_err(nbd_to_dev(nbd), + "backend image doesn't match with %s\n", + nbd->backend); + return -EINVAL; + } + } else { + mutex_unlock(&nbd_index_mutex); + dev_err(nbd_to_dev(nbd), "must specify backend\n"); + return -EINVAL; + } + } if (!refcount_inc_not_zero(&nbd->refs)) { mutex_unlock(&nbd_index_mutex); printk(KERN_ERR "nbd: device at index %d is going down\n", diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h index c5d0ef7aa7d5..2d0b90964227 100644 --- a/include/uapi/linux/nbd-netlink.h +++ b/include/uapi/linux/nbd-netlink.h @@ -35,6 +35,7 @@ enum { NBD_ATTR_SOCKETS, NBD_ATTR_DEAD_CONN_TIMEOUT, NBD_ATTR_DEVICE_LIST, + NBD_ATTR_BACKEND_IDENTIFIER, __NBD_ATTR_MAX, }; #define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) From 249cda3325e0ff35dd8af9b5885f3aaf4ddd165d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Jun 2021 07:39:33 +0200 Subject: [PATCH 07/34] mmc: remove an extra blk_{get,put}_queue pair The gendisk already acquires a reference to the queue when add_disk is called, which dropped on put_disk. So remove the superflous extra refcounting. Signed-off-by: Christoph Hellwig Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210616053934.880951-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mmc/core/block.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 88f4c215caa6..d663daef57f2 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -202,7 +202,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) md->usage--; if (md->usage == 0) { int devidx = mmc_get_devidx(md->disk); - blk_put_queue(md->queue.queue); + ida_simple_remove(&mmc_blk_ida, devidx); put_disk(md->disk); kfree(md); @@ -2335,18 +2335,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->queue.blkdata = md; - /* - * Keep an extra reference to the queue so that we can shutdown the - * queue (i.e. call blk_cleanup_queue()) while there are still - * references to the 'md'. The corresponding blk_put_queue() is in - * mmc_blk_put(). - */ - if (!blk_get_queue(md->queue.queue)) { - mmc_cleanup_queue(&md->queue); - ret = -ENODEV; - goto err_putdisk; - } - md->disk->major = MMC_BLOCK_MAJOR; md->disk->first_minor = devidx * perdev_minors; md->disk->fops = &mmc_bdops; From 607d968a5769d8eef20ece19b84937f9c2676491 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Jun 2021 07:39:34 +0200 Subject: [PATCH 08/34] mmc: switch to blk_mq_alloc_disk Use the blk_mq_alloc_disk to allocate the request_queue and gendisk together. Signed-off-by: Christoph Hellwig Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210616053934.880951-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mmc/core/block.c | 14 +++----------- drivers/mmc/core/queue.c | 23 ++++++++++------------- drivers/mmc/core/queue.h | 2 +- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index d663daef57f2..e7f89cbf9232 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2319,27 +2319,21 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, */ md->read_only = mmc_blk_readonly(card); - md->disk = alloc_disk(perdev_minors); - if (md->disk == NULL) { - ret = -ENOMEM; + md->disk = mmc_init_queue(&md->queue, card); + if (IS_ERR(md->disk)) { + ret = PTR_ERR(md->disk); goto err_kfree; } INIT_LIST_HEAD(&md->part); INIT_LIST_HEAD(&md->rpmbs); md->usage = 1; - - ret = mmc_init_queue(&md->queue, card); - if (ret) - goto err_putdisk; - md->queue.blkdata = md; md->disk->major = MMC_BLOCK_MAJOR; md->disk->first_minor = devidx * perdev_minors; md->disk->fops = &mmc_bdops; md->disk->private_data = md; - md->disk->queue = md->queue.queue; md->parent = parent; set_disk_ro(md->disk, md->read_only || default_ro); md->disk->flags = GENHD_FL_EXT_DEVT; @@ -2388,8 +2382,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, return md; - err_putdisk: - put_disk(md->disk); err_kfree: kfree(md); out: diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index d600e0a4a460..cc3261777637 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -424,9 +424,10 @@ static inline bool mmc_merge_capable(struct mmc_host *host) * * Initialise a MMC card request queue. */ -int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) +struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) { struct mmc_host *host = card->host; + struct gendisk *disk; int ret; mq->card = card; @@ -464,26 +465,22 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) ret = blk_mq_alloc_tag_set(&mq->tag_set); if (ret) - return ret; + return ERR_PTR(ret); + - mq->queue = blk_mq_init_queue(&mq->tag_set); - if (IS_ERR(mq->queue)) { - ret = PTR_ERR(mq->queue); - goto free_tag_set; + disk = blk_mq_alloc_disk(&mq->tag_set, mq); + if (IS_ERR(disk)) { + blk_mq_free_tag_set(&mq->tag_set); + return disk; } + mq->queue = disk->queue; if (mmc_host_is_spi(host) && host->use_spi_crc) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); - - mq->queue->queuedata = mq; blk_queue_rq_timeout(mq->queue, 60 * HZ); mmc_setup_queue(mq, card); - return 0; - -free_tag_set: - blk_mq_free_tag_set(&mq->tag_set); - return ret; + return disk; } void mmc_queue_suspend(struct mmc_queue *mq) diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 3319d8ab57d0..9ade3bcbb714 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -94,7 +94,7 @@ struct mmc_queue { struct work_struct complete_work; }; -extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *); +struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card); extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); From 1033d103a9b795b41ca0bb90587047a65e2ed5ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Jun 2021 10:01:44 +0200 Subject: [PATCH 09/34] mmc: initialized disk->minors Fix a let hunk from the blk_mq_alloc_disk conversion. Fixes: 281ea6a5bfdc ("mmc: switch to blk_mq_alloc_disk") Reported-by: Marek Szyprowski Signed-off-by: Christoph Hellwig Tested-by: Marek Szyprowski Acked-by: Ulf Hansson Link: https://lore.kernel.org/r/20210621080144.3655131-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mmc/core/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index e7f89cbf9232..9890a1532cb0 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2331,6 +2331,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->queue.blkdata = md; md->disk->major = MMC_BLOCK_MAJOR; + md->disk->minors = perdev_minors; md->disk->first_minor = devidx * perdev_minors; md->disk->fops = &mmc_bdops; md->disk->private_data = md; From 8b52d8be86d723085784317427d339528766d9a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:00 +0200 Subject: [PATCH 10/34] loop: reorder loop_exit Unregister the misc and blockdevice first to prevent further access, and only then iterate to remove the devices. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cc0e8c39a48b..0e96f8c422b3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2550,14 +2550,11 @@ static int loop_exit_cb(int id, void *ptr, void *data) static void __exit loop_exit(void) { mutex_lock(&loop_ctl_mutex); + unregister_blkdev(LOOP_MAJOR, "loop"); + misc_deregister(&loop_misc); idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); idr_destroy(&loop_index_idr); - - unregister_blkdev(LOOP_MAJOR, "loop"); - - misc_deregister(&loop_misc); - mutex_unlock(&loop_ctl_mutex); } From bd5c39edad535d9f6ccb99633930f9f7b768593c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:01 +0200 Subject: [PATCH 11/34] loop: reduce loop_ctl_mutex coverage in loop_exit loop_ctl_mutex is only needed to iterate the IDR for removing the loop devices, so reduce the coverage. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0e96f8c422b3..4a3712e4cf12 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2549,13 +2549,14 @@ static int loop_exit_cb(int id, void *ptr, void *data) static void __exit loop_exit(void) { - mutex_lock(&loop_ctl_mutex); unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); + mutex_lock(&loop_ctl_mutex); idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); - idr_destroy(&loop_index_idr); mutex_unlock(&loop_ctl_mutex); + + idr_destroy(&loop_index_idr); } module_init(loop_init); From d6da83d072c187d6a69d5a49e2320f62920889d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:02 +0200 Subject: [PATCH 12/34] loop: remove the l argument to loop_add None of the callers cares about the allocated struct loop_device. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4a3712e4cf12..d587ad210ecd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2237,7 +2237,7 @@ static const struct blk_mq_ops loop_mq_ops = { .complete = lo_complete_rq, }; -static int loop_add(struct loop_device **l, int i) +static int loop_add(int i) { struct loop_device *lo; struct gendisk *disk; @@ -2326,7 +2326,6 @@ static int loop_add(struct loop_device **l, int i) disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); add_disk(disk); - *l = lo; return lo->lo_number; out_cleanup_tags: @@ -2396,7 +2395,7 @@ static void loop_probe(dev_t dev) mutex_lock(&loop_ctl_mutex); if (loop_lookup(&lo, idx) < 0) - loop_add(&lo, idx); + loop_add(idx); mutex_unlock(&loop_ctl_mutex); } @@ -2418,7 +2417,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = -EEXIST; break; } - ret = loop_add(&lo, parm); + ret = loop_add(parm); break; case LOOP_CTL_REMOVE: ret = loop_lookup(&lo, parm); @@ -2446,7 +2445,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, -1); if (ret >= 0) break; - ret = loop_add(&lo, -1); + ret = loop_add(-1); } mutex_unlock(&loop_ctl_mutex); @@ -2473,7 +2472,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - struct loop_device *lo; int err; part_shift = 0; @@ -2527,7 +2525,7 @@ static int __init loop_init(void) /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) - loop_add(&lo, i); + loop_add(i); mutex_unlock(&loop_ctl_mutex); printk(KERN_INFO "loop: module loaded\n"); From 4157fe0b3d16ceca4316674a90c681405cdd23bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:03 +0200 Subject: [PATCH 13/34] loop: don't call loop_lookup before adding a loop device loop_add returns the right error if the slot wasn't available. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210623145908.92973-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d587ad210ecd..5b77c1e160b5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2388,14 +2388,12 @@ out: static void loop_probe(dev_t dev) { int idx = MINOR(dev) >> part_shift; - struct loop_device *lo; if (max_loop && idx >= max_loop) return; mutex_lock(&loop_ctl_mutex); - if (loop_lookup(&lo, idx) < 0) - loop_add(idx); + loop_add(idx); mutex_unlock(&loop_ctl_mutex); } @@ -2412,11 +2410,6 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: - ret = loop_lookup(&lo, parm); - if (ret >= 0) { - ret = -EEXIST; - break; - } ret = loop_add(parm); break; case LOOP_CTL_REMOVE: From f9d107644aa4943b383986a8aa36125379eeaa81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:04 +0200 Subject: [PATCH 14/34] loop: split loop_control_ioctl Split loop_control_ioctl into a helper for each command. This keeps the code nicely separated for the upcoming locking changes. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 97 ++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 35 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5b77c1e160b5..c3c6cfdcaf34 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2397,8 +2397,51 @@ static void loop_probe(dev_t dev) mutex_unlock(&loop_ctl_mutex); } -static long loop_control_ioctl(struct file *file, unsigned int cmd, - unsigned long parm) +static int loop_control_add(int idx) +{ + int ret; + + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + ret = loop_add(idx); + mutex_unlock(&loop_ctl_mutex); + return ret; +} + +static int loop_control_remove(int idx) +{ + struct loop_device *lo; + int ret; + + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + + ret = loop_lookup(&lo, idx); + if (ret < 0) + goto out_unlock_ctrl; + + ret = mutex_lock_killable(&lo->lo_mutex); + if (ret) + goto out_unlock_ctrl; + if (lo->lo_state != Lo_unbound || + atomic_read(&lo->lo_refcnt) > 0) { + mutex_unlock(&lo->lo_mutex); + ret = -EBUSY; + goto out_unlock_ctrl; + } + lo->lo_state = Lo_deleting; + mutex_unlock(&lo->lo_mutex); + + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); +out_unlock_ctrl: + mutex_unlock(&loop_ctl_mutex); + return ret; +} + +static int loop_control_get_free(int idx) { struct loop_device *lo; int ret; @@ -2406,45 +2449,29 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = mutex_lock_killable(&loop_ctl_mutex); if (ret) return ret; - - ret = -ENOSYS; - switch (cmd) { - case LOOP_CTL_ADD: - ret = loop_add(parm); - break; - case LOOP_CTL_REMOVE: - ret = loop_lookup(&lo, parm); - if (ret < 0) - break; - ret = mutex_lock_killable(&lo->lo_mutex); - if (ret) - break; - if (lo->lo_state != Lo_unbound) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - if (atomic_read(&lo->lo_refcnt) > 0) { - ret = -EBUSY; - mutex_unlock(&lo->lo_mutex); - break; - } - lo->lo_state = Lo_deleting; - mutex_unlock(&lo->lo_mutex); - idr_remove(&loop_index_idr, lo->lo_number); - loop_remove(lo); - break; - case LOOP_CTL_GET_FREE: - ret = loop_lookup(&lo, -1); - if (ret >= 0) - break; + ret = loop_lookup(&lo, -1); + if (ret < 0) ret = loop_add(-1); - } mutex_unlock(&loop_ctl_mutex); return ret; } +static long loop_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + switch (cmd) { + case LOOP_CTL_ADD: + return loop_control_add(parm); + case LOOP_CTL_REMOVE: + return loop_control_remove(parm); + case LOOP_CTL_GET_FREE: + return loop_control_get_free(parm); + default: + return -ENOSYS; + } +} + static const struct file_operations loop_ctl_fops = { .open = nonseekable_open, .unlocked_ioctl = loop_control_ioctl, From 18d1f200b3807c383d80cc00d6bbdee288e63b1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:05 +0200 Subject: [PATCH 15/34] loop: move loop_ctl_mutex locking into loop_add Move acquiring and releasing loop_ctl_mutex from the callers into loop_add. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c3c6cfdcaf34..c18e2930ca11 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2247,9 +2247,12 @@ static int loop_add(int i) lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) goto out; - lo->lo_state = Lo_unbound; + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + goto out_free_dev; + /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); @@ -2259,7 +2262,7 @@ static int loop_add(int i) err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } if (err < 0) - goto out_free_dev; + goto out_unlock; i = err; err = -ENOMEM; @@ -2326,12 +2329,15 @@ static int loop_add(int i) disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); add_disk(disk); - return lo->lo_number; + mutex_unlock(&loop_ctl_mutex); + return i; out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: idr_remove(&loop_index_idr, i); +out_unlock: + mutex_unlock(&loop_ctl_mutex); out_free_dev: kfree(lo); out: @@ -2391,22 +2397,7 @@ static void loop_probe(dev_t dev) if (max_loop && idx >= max_loop) return; - - mutex_lock(&loop_ctl_mutex); loop_add(idx); - mutex_unlock(&loop_ctl_mutex); -} - -static int loop_control_add(int idx) -{ - int ret; - - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; - ret = loop_add(idx); - mutex_unlock(&loop_ctl_mutex); - return ret; } static int loop_control_remove(int idx) @@ -2450,11 +2441,11 @@ static int loop_control_get_free(int idx) if (ret) return ret; ret = loop_lookup(&lo, -1); - if (ret < 0) - ret = loop_add(-1); mutex_unlock(&loop_ctl_mutex); - return ret; + if (ret >= 0) + return ret; + return loop_add(-1); } static long loop_control_ioctl(struct file *file, unsigned int cmd, @@ -2462,7 +2453,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, { switch (cmd) { case LOOP_CTL_ADD: - return loop_control_add(parm); + return loop_add(parm); case LOOP_CTL_REMOVE: return loop_control_remove(parm); case LOOP_CTL_GET_FREE: @@ -2543,10 +2534,8 @@ static int __init loop_init(void) } /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) loop_add(i); - mutex_unlock(&loop_ctl_mutex); printk(KERN_INFO "loop: module loaded\n"); return 0; From e5d66a10324f2a0c31153fd6210d99a9d00ef047 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:06 +0200 Subject: [PATCH 16/34] loop: don't allow deleting an unspecified loop device Passing a negative index to loop_lookup while return any unbound device. Doing that for a delete does not make much sense, so add check to explicitly reject that case. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210623145908.92973-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c18e2930ca11..b217065a6d67 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2404,6 +2404,11 @@ static int loop_control_remove(int idx) { struct loop_device *lo; int ret; + + if (idx < 0) { + pr_warn("deleting an unspecified loop device is not supported.\n"); + return -EINVAL; + } ret = mutex_lock_killable(&loop_ctl_mutex); if (ret) From b9848081465d8734441408129bd44311c7b6d644 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:07 +0200 Subject: [PATCH 17/34] loop: split loop_lookup loop_lookup has two callers - one wants to do the a find by index and the other wants any unbound loop device. Open code the respective functionality in each caller. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210623145908.92973-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 57 ++++++++++---------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b217065a6d67..ff7014608ef6 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2353,44 +2353,6 @@ static void loop_remove(struct loop_device *lo) kfree(lo); } -static int find_free_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - struct loop_device **l = data; - - if (lo->lo_state == Lo_unbound) { - *l = lo; - return 1; - } - return 0; -} - -static int loop_lookup(struct loop_device **l, int i) -{ - struct loop_device *lo; - int ret = -ENODEV; - - if (i < 0) { - int err; - - err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); - if (err == 1) { - *l = lo; - ret = lo->lo_number; - } - goto out; - } - - /* lookup and return a specific i */ - lo = idr_find(&loop_index_idr, i); - if (lo) { - *l = lo; - ret = lo->lo_number; - } -out: - return ret; -} - static void loop_probe(dev_t dev) { int idx = MINOR(dev) >> part_shift; @@ -2414,9 +2376,11 @@ static int loop_control_remove(int idx) if (ret) return ret; - ret = loop_lookup(&lo, idx); - if (ret < 0) + lo = idr_find(&loop_index_idr, idx); + if (!lo) { + ret = -ENODEV; goto out_unlock_ctrl; + } ret = mutex_lock_killable(&lo->lo_mutex); if (ret) @@ -2440,17 +2404,20 @@ out_unlock_ctrl: static int loop_control_get_free(int idx) { struct loop_device *lo; - int ret; + int id, ret; ret = mutex_lock_killable(&loop_ctl_mutex); if (ret) return ret; - ret = loop_lookup(&lo, -1); + idr_for_each_entry(&loop_index_idr, lo, id) { + if (lo->lo_state == Lo_unbound) + goto found; + } mutex_unlock(&loop_ctl_mutex); - - if (ret >= 0) - return ret; return loop_add(-1); +found: + mutex_unlock(&loop_ctl_mutex); + return id; } static long loop_control_ioctl(struct file *file, unsigned int cmd, From 8e60947d2f1ee675681a526da99fa587e63f78e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:59:08 +0200 Subject: [PATCH 18/34] loop: rewrite loop_exit using idr_for_each_entry Use idr_for_each_entry to simplify removing all devices. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210623145908.92973-10-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ff7014608ef6..39c05cf518fb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2518,21 +2518,17 @@ err_out: return err; } -static int loop_exit_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - - loop_remove(lo); - return 0; -} - static void __exit loop_exit(void) { + struct loop_device *lo; + int id; + unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); mutex_lock(&loop_ctl_mutex); - idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); + idr_for_each_entry(&loop_index_idr, lo, id) + loop_remove(lo); mutex_unlock(&loop_ctl_mutex); idr_destroy(&loop_index_idr); From 5ec780a6eddacbbbc1c5d5838753c3ca43f93526 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jun 2021 10:10:12 +0200 Subject: [PATCH 19/34] block: mark blk_mq_init_queue_data static All driver uses are gone now. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20210624081012.256464-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 +-- include/linux/blk-mq.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2e9fd0ec63d7..2c4ac51e54eb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3109,7 +3109,7 @@ void blk_mq_release(struct request_queue *q) blk_mq_sysfs_deinit(q); } -struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, +static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata) { struct request_queue *q; @@ -3126,7 +3126,6 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, } return q; } -EXPORT_SYMBOL_GPL(blk_mq_init_queue_data); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fd2de2b422ed..1d18447ebebc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -439,8 +439,6 @@ enum { struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); -struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, - void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_unregister_dev(struct device *, struct request_queue *); From da6269da4cfe29f484e8fd27c1496b81b47e2499 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jun 2021 14:39:34 +0200 Subject: [PATCH 20/34] block: remove REQ_OP_SCSI_{IN,OUT} With the legacy IDE driver gone drivers now use either REQ_OP_DRV_* or REQ_OP_SCSI_*, so unify the two concepts of passthrough requests into a single one. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- block/bsg-lib.c | 2 +- block/bsg.c | 2 +- block/scsi_ioctl.c | 6 +++--- drivers/block/pktcdvd.c | 2 +- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 8 ++++---- drivers/scsi/sg.c | 2 +- drivers/scsi/st.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- fs/nfsd/blocklayout.c | 2 +- include/linux/blk_types.h | 3 --- include/linux/blkdev.h | 33 +++--------------------------- 14 files changed, 19 insertions(+), 51 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 514838ccab2d..3eea8d795565 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -142,8 +142,6 @@ static const char *const blk_op_name[] = { REQ_OP_NAME(ZONE_APPEND), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), - REQ_OP_NAME(SCSI_IN), - REQ_OP_NAME(SCSI_OUT), REQ_OP_NAME(DRV_IN), REQ_OP_NAME(DRV_OUT), }; diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 330fede77271..57b082bc9017 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -45,7 +45,7 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr, return PTR_ERR(job->request); if (hdr->dout_xfer_len && hdr->din_xfer_len) { - job->bidi_rq = blk_get_request(rq->q, REQ_OP_SCSI_IN, 0); + job->bidi_rq = blk_get_request(rq->q, REQ_OP_DRV_IN, 0); if (IS_ERR(job->bidi_rq)) { ret = PTR_ERR(job->bidi_rq); goto out; diff --git a/block/bsg.c b/block/bsg.c index bd10922d5cbb..323e45878362 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -152,7 +152,7 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) return ret; rq = blk_get_request(q, hdr.dout_xfer_len ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 1b3fe99b83a6..41ca95bfe607 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -311,7 +311,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, at_head = 1; ret = -ENOMEM; - rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + rq = blk_get_request(q, writing ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); req = scsi_req(rq); @@ -433,7 +433,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } - rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + rq = blk_get_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; @@ -521,7 +521,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, struct request *rq; int err; - rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0); + rq = blk_get_request(q, REQ_OP_DRV_OUT, 0); if (IS_ERR(rq)) return PTR_ERR(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f69b5c69c2a6..538446b652de 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,7 +704,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 90ad34c6ef8e..feb827eefd1a 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2186,7 +2186,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); + rq = blk_get_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) { ret = PTR_ERR(rq); break; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index d8fafe77dbbe..03a2ff547b22 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2011,7 +2011,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) struct request *req; struct scsi_request *rq; - req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, 0); + req = blk_get_request(sdev->request_queue, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return; rq = scsi_req(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 532304d42f00..6cc7dad923cb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -215,7 +215,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return ret; @@ -540,7 +540,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (blk_queue_add_random(q)) add_disk_randomness(req->rq_disk); - if (!blk_rq_is_scsi(req)) { + if (!blk_rq_is_passthrough(req)) { WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED)); cmd->flags &= ~SCMD_INITIALIZED; } @@ -1115,7 +1115,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) bool in_flight; int budget_token = cmd->budget_token; - if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) { + if (!blk_rq_is_passthrough(rq) && !(flags & SCMD_INITIALIZED)) { flags |= SCMD_INITIALIZED; scsi_initialize_rq(rq); } @@ -1556,7 +1556,7 @@ static blk_status_t scsi_prepare_cmd(struct request *req) * Special handling for passthrough commands, which don't go to the ULP * at all: */ - if (blk_rq_is_scsi(req)) + if (blk_rq_is_passthrough(req)) return scsi_setup_scsi_cmnd(sdev, req); if (sdev->handler && sdev->handler->prep_fn) { diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index def7ec3bbaf9..f84fa550dd15 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1756,7 +1756,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) * not expect an EWOULDBLOCK from this condition. */ rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) { kfree(long_cmdp); return PTR_ERR(rq); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 3b1afe1d5b27..86c951c654a8 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -549,7 +549,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, req = blk_get_request(SRpnt->stp->device->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return DRIVER_ERROR << 24; rq = scsi_req(req); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f2a11414366d..4531cf47d24e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -982,7 +982,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, cmd->data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0); + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(req)) { pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 1058659a8d31..c99dee99a3c1 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -236,7 +236,7 @@ again: if (!buf) return -ENOMEM; - rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); + rq = blk_get_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) { error = -ENOMEM; goto out_free_buf; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fd3860d18d7e..db61f7df1823 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -350,9 +350,6 @@ enum req_opf { /* reset all the zone present on the device */ REQ_OP_ZONE_RESET_ALL = 17, - /* SCSI passthrough using struct scsi_request */ - REQ_OP_SCSI_IN = 32, - REQ_OP_SCSI_OUT = 33, /* Driver private requests */ REQ_OP_DRV_IN = 34, REQ_OP_DRV_OUT = 35, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d66d0da72529..d199e51524eb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -240,42 +240,15 @@ struct request { void *end_io_data; }; -static inline bool blk_op_is_scsi(unsigned int op) -{ - return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; -} - -static inline bool blk_op_is_private(unsigned int op) +static inline bool blk_op_is_passthrough(unsigned int op) { + op &= REQ_OP_MASK; return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } -static inline bool blk_rq_is_scsi(struct request *rq) -{ - return blk_op_is_scsi(req_op(rq)); -} - -static inline bool blk_rq_is_private(struct request *rq) -{ - return blk_op_is_private(req_op(rq)); -} - static inline bool blk_rq_is_passthrough(struct request *rq) { - return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); -} - -static inline bool bio_is_passthrough(struct bio *bio) -{ - unsigned op = bio_op(bio); - - return blk_op_is_scsi(op) || blk_op_is_private(op); -} - -static inline bool blk_op_is_passthrough(unsigned int op) -{ - return (blk_op_is_scsi(op & REQ_OP_MASK) || - blk_op_is_private(op & REQ_OP_MASK)); + return blk_op_is_passthrough(req_op(rq)); } static inline unsigned short req_get_ioprio(struct request *req) From c01b5a814e7b28e327883838bad159194bdd68e8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Jun 2021 14:44:34 -0700 Subject: [PATCH 21/34] block: support polling through blk_execute_rq Poll for completions if the request's hctx is a polling type. Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210610214437.641245-2-kbusch@kernel.org Signed-off-by: Jens Axboe --- block/blk-exec.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/block/blk-exec.c b/block/blk-exec.c index beae70a0e5e5..38f88552aa31 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -63,6 +63,19 @@ void blk_execute_rq_nowait(struct gendisk *bd_disk, struct request *rq, } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); +static bool blk_rq_is_poll(struct request *rq) +{ + return rq->mq_hctx && rq->mq_hctx->type == HCTX_TYPE_POLL; +} + +static void blk_rq_poll_completion(struct request *rq, struct completion *wait) +{ + do { + blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), true); + cond_resched(); + } while (!completion_done(wait)); +} + /** * blk_execute_rq - insert a request into queue for execution * @bd_disk: matching gendisk @@ -83,7 +96,10 @@ void blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head) /* Prevent hang_check timer from firing at us during very long I/O */ hang_check = sysctl_hung_task_timeout_secs; - if (hang_check) + + if (blk_rq_is_poll(rq)) + blk_rq_poll_completion(rq, &wait); + else if (hang_check) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else wait_for_completion_io(&wait); From be42a33b9252f0b3857cadb896e430ee17cccad4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Jun 2021 14:44:35 -0700 Subject: [PATCH 22/34] nvme: use blk_execute_rq() for passthrough commands The generic blk_execute_rq() knows how to handle polled completions. Use that instead of implementing an nvme specific handler. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210610214437.641245-3-kbusch@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 40 +++++++------------------------------ drivers/nvme/host/fabrics.c | 13 ++++++------ drivers/nvme/host/fabrics.h | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/rdma.c | 3 +-- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/loop.c | 2 +- 8 files changed, 19 insertions(+), 47 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 80c656dcbbac..e0d3f0aa25da 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -631,6 +631,8 @@ static inline void nvme_init_request(struct request *req, cmd->common.flags &= ~NVME_CMD_SGL_ALL; req->cmd_flags |= REQ_FAILFAST_DRIVER; + if (req->mq_hctx->type == HCTX_TYPE_POLL) + req->cmd_flags |= REQ_HIPRI; nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } @@ -1029,31 +1031,6 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) } EXPORT_SYMBOL_GPL(nvme_setup_cmd); -static void nvme_end_sync_rq(struct request *rq, blk_status_t error) -{ - struct completion *waiting = rq->end_io_data; - - rq->end_io_data = NULL; - complete(waiting); -} - -static void nvme_execute_rq_polled(struct request_queue *q, - struct gendisk *bd_disk, struct request *rq, int at_head) -{ - DECLARE_COMPLETION_ONSTACK(wait); - - WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)); - - rq->cmd_flags |= REQ_HIPRI; - rq->end_io_data = &wait; - blk_execute_rq_nowait(bd_disk, rq, at_head, nvme_end_sync_rq); - - while (!completion_done(&wait)) { - blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); - cond_resched(); - } -} - /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -1061,7 +1038,7 @@ static void nvme_execute_rq_polled(struct request_queue *q, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags, bool poll) + blk_mq_req_flags_t flags) { struct request *req; int ret; @@ -1082,10 +1059,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - if (poll) - nvme_execute_rq_polled(req->q, NULL, req, at_head); - else - blk_execute_rq(NULL, req, at_head); + blk_execute_rq(NULL, req, at_head); if (result) *result = nvme_req(req)->result; if (nvme_req(req)->flags & NVME_REQ_CANCELLED) @@ -1102,7 +1076,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0, - NVME_QID_ANY, 0, 0, false); + NVME_QID_ANY, 0, 0); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -1465,7 +1439,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, 0, NVME_QID_ANY, 0, 0, false); + buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -2047,7 +2021,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0, - NVME_QID_ANY, 1, 0, false); + NVME_QID_ANY, 1, 0); } EXPORT_SYMBOL_GPL(nvme_sec_submit); #endif /* CONFIG_BLK_SED_OPAL */ diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 1e6a7cc056ca..a5469fd9d4c3 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -154,7 +154,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0, false); + NVME_QID_ANY, 0, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -200,7 +200,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0, false); + NVME_QID_ANY, 0, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -245,7 +245,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0, - NVME_QID_ANY, 0, 0, false); + NVME_QID_ANY, 0, 0); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -391,7 +391,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, data, sizeof(*data), 0, NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -415,7 +415,6 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue); * @qid: NVMe I/O queue number for the new I/O connection between * host and target (note qid == 0 is illegal as this is * the Admin queue, per NVMe standard). - * @poll: Whether or not to poll for the completion of the connect cmd. * * This function issues a fabrics-protocol connection * of a NVMe I/O queue (via NVMe Fabrics "Connect" command) @@ -427,7 +426,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue); * > 0: NVMe error status code * < 0: Linux errno error code */ -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll) +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) { struct nvme_command cmd = { }; struct nvmf_connect_data *data; @@ -453,7 +452,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll) ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, poll); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index c31dad69a773..a146cb903869 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -182,7 +182,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl); -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll); +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid); int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7600863f7752..7f462af1b02a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2346,7 +2346,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) (qsize / 5)); if (ret) break; - ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 75420ceacc10..3b12ad78ee7a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -658,7 +658,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags, bool poll); + blk_mq_req_flags_t flags); int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a9e70cefd7ed..7f6b3a991501 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -680,11 +680,10 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) { struct nvme_rdma_queue *queue = &ctrl->queues[idx]; - bool poll = nvme_rdma_poll_queue(queue); int ret; if (idx) - ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll); + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); else ret = nvmf_connect_admin_queue(&ctrl->ctrl); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c7bd37103cf4..12acfe05cd68 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1574,7 +1574,7 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) int ret; if (idx) - ret = nvmf_connect_io_queue(nctrl, idx, false); + ret = nvmf_connect_io_queue(nctrl, idx); else ret = nvmf_connect_admin_queue(nctrl); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a5c4a1865026..3a17a7e26bbf 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -337,7 +337,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); From fb9b16e15cd70e21d8af7f03d700deb9509c2ce8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Jun 2021 14:44:36 -0700 Subject: [PATCH 23/34] block: return errors from blk_execute_rq() The synchronous blk_execute_rq() had not provided a way for its callers to know if its request was successful or not. Return the blk_status_t result of the request. Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210610214437.641245-4-kbusch@kernel.org Signed-off-by: Jens Axboe --- block/blk-exec.c | 7 +++++-- include/linux/blkdev.h | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/block/blk-exec.c b/block/blk-exec.c index 38f88552aa31..d6cd501c0d34 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -21,7 +21,7 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error) { struct completion *waiting = rq->end_io_data; - rq->end_io_data = NULL; + rq->end_io_data = (void *)(uintptr_t)error; /* * complete last, if this is a stack request the process (and thus @@ -85,8 +85,9 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait) * Description: * Insert a fully prepared request at the back of the I/O scheduler queue * for execution and wait for completion. + * Return: The blk_status_t result provided to blk_mq_end_request(). */ -void blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head) +blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head) { DECLARE_COMPLETION_ONSTACK(wait); unsigned long hang_check; @@ -103,5 +104,7 @@ void blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else wait_for_completion_io(&wait); + + return (blk_status_t)(uintptr_t)rq->end_io_data; } EXPORT_SYMBOL(blk_execute_rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d199e51524eb..c454fb446fd0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -909,10 +909,12 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns extern int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); -extern void blk_execute_rq(struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct gendisk *, struct request *, int, rq_end_io_fn *); +blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, + int at_head); + /* Helper to convert REQ_OP_XXX to its string format XXX */ extern const char *blk_op_str(unsigned int op); From ae5e6886b4f8d62a9d01fea4221a854e541a1cd0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Jun 2021 14:44:37 -0700 Subject: [PATCH 24/34] nvme: use return value from blk_execute_rq() We don't have an nvme status to report if the driver's .queue_rq() returns an error without dispatching the requested nvme command. Check the return value from blk_execute_rq() for all passthrough commands so the caller may know their command was not successful. If the command is from the target passthrough interface and fails to dispatch, synthesize the response back to the host as a internal target error. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210610214437.641245-5-kbusch@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 35 ++++++++++++++++++++++++++-------- drivers/nvme/host/ioctl.c | 6 +----- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/target/passthru.c | 8 ++++---- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e0d3f0aa25da..11779be42186 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -609,6 +609,7 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { + nvme_req(req)->status = 0; nvme_req(req)->retries = 0; nvme_req(req)->flags = 0; req->rq_flags |= RQF_DONTPREP; @@ -1031,6 +1032,25 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) } EXPORT_SYMBOL_GPL(nvme_setup_cmd); +/* + * Return values: + * 0: success + * >0: nvme controller's cqe status response + * <0: kernel error in lieu of controller response + */ +static int nvme_execute_rq(struct gendisk *disk, struct request *rq, + bool at_head) +{ + blk_status_t status; + + status = blk_execute_rq(disk, rq, at_head); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + return -EINTR; + if (nvme_req(rq)->status) + return nvme_req(rq)->status; + return blk_status_to_errno(status); +} + /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -1059,13 +1079,9 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - blk_execute_rq(NULL, req, at_head); - if (result) + ret = nvme_execute_rq(NULL, req, at_head); + if (result && ret >= 0) *result = nvme_req(req)->result; - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else - ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -1153,18 +1169,21 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) } } -void nvme_execute_passthru_rq(struct request *rq) +int nvme_execute_passthru_rq(struct request *rq) { struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; u32 effects; + int ret; effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - blk_execute_rq(disk, rq, 0); + ret = nvme_execute_rq(disk, rq, false); if (effects) /* nothing to be done for zero cmd effects */ nvme_passthru_end(ctrl, effects); + + return ret; } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d93928d1e5bd..305ddd415e45 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -93,11 +93,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, } } - nvme_execute_passthru_rq(req); - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else - ret = nvme_req(req)->status; + ret = nvme_execute_passthru_rq(req); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta && !ret && !write) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3b12ad78ee7a..18ef8dd03a90 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -876,7 +876,7 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -void nvme_execute_passthru_rq(struct request *rq); +int nvme_execute_passthru_rq(struct request *rq); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index fced52de33ce..225cd1ffbe45 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -153,11 +153,10 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; - u16 status; + int status; - nvme_execute_passthru_rq(rq); + status = nvme_execute_passthru_rq(rq); - status = nvme_req(rq)->status; if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { switch (req->cmd->identify.cns) { @@ -168,7 +167,8 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) nvmet_passthru_override_id_ns(req); break; } - } + } else if (status < 0) + status = NVME_SC_INTERNAL; req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, status); From efee99e68e69d8a1966f3d426cc0cea73e32c6d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Jun 2021 11:39:37 +0200 Subject: [PATCH 25/34] ubd: remove dead code in ubd_setup_common Remove some leftovers of the fake major number parsing that cause complains from some compilers. Fixes: 2933a1b2c6f3 ("ubd: remove the code to register as the legacy IDE driver") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210628093937.1325608-1-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 0b86aa1b12f1..e3093071406d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -243,22 +243,12 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) if(index_out) *index_out = -1; n = *str; if(n == '='){ - char *end; - int major; - str++; if(!strcmp(str, "sync")){ global_openflags = of_sync(global_openflags); return err; } - err = -EINVAL; - major = simple_strtoul(str, &end, 0); - if((*end != '\0') || (end == str)){ - *error_out = "Didn't parse major number"; - return err; - } - pr_warn("fake major not supported any more\n"); return 0; } From b5cfbd35eccaa0b532dc0d8a31e4d59b5e314c93 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Thu, 10 Jun 2021 10:32:41 +0800 Subject: [PATCH 26/34] block: check disk exist before trying to add partition If disk have been deleted, we should return fail for ioctl BLKPG_DEL_PARTITION. Otherwise, the directory /sys/class/block may remain invalid symlinks file. The race as following: blkdev_open del_gendisk disk->flags &= ~GENHD_FL_UP; blk_drop_partitions blkpg_ioctl bdev_add_partition add_partition device_add device_add_class_symlinks ioctl may add_partition after del_gendisk() have tried to delete partitions. Then, symlinks file will be created. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Yufen Yu Link: https://lore.kernel.org/r/20210610023241.3646241-1-yuyufen@huawei.com Signed-off-by: Jens Axboe --- block/partitions/core.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 347c56a51d87..ed78cdfe054b 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -453,17 +453,26 @@ int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { struct block_device *part; + struct gendisk *disk = bdev->bd_disk; + int ret; - mutex_lock(&bdev->bd_disk->open_mutex); - if (partition_overlaps(bdev->bd_disk, start, length, -1)) { - mutex_unlock(&bdev->bd_disk->open_mutex); - return -EBUSY; + mutex_lock(&disk->open_mutex); + if (!(disk->flags & GENHD_FL_UP)) { + ret = -ENXIO; + goto out; } - part = add_partition(bdev->bd_disk, partno, start, length, + if (partition_overlaps(disk, start, length, -1)) { + ret = -EBUSY; + goto out; + } + + part = add_partition(disk, partno, start, length, ADDPART_FLAG_NONE, NULL); - mutex_unlock(&bdev->bd_disk->open_mutex); - return PTR_ERR_OR_ZERO(part); + ret = PTR_ERR_OR_ZERO(part); +out: + mutex_unlock(&disk->open_mutex); + return ret; } int bdev_del_partition(struct block_device *bdev, int partno) From 299f2b5fc08be90aebcaa471b4d0f2bb94f2fbbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 16:22:20 +0200 Subject: [PATCH 27/34] dasd: unexport dasd_set_target_state dasd_set_target_state is only used inside of dasd_mod.ko, so don't export it. Signed-off-by: Christoph Hellwig Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20210701142221.3408680-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index c8df75e99f4c..e34c6cc61983 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -621,7 +621,6 @@ void dasd_set_target_state(struct dasd_device *device, int target) mutex_unlock(&device->state_mutex); dasd_put_device(device); } -EXPORT_SYMBOL(dasd_set_target_state); /* * Enable devices with device numbers in [from..to]. From 2b7a8dc06d0f840345ae3c7ed6f9d55962b5f54a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Jul 2021 16:22:21 +0200 Subject: [PATCH 28/34] s390/dasd: Avoid field over-reading memcpy() In preparation for FORTIFY_SOURCE performing compile-time and run-time field array bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields. Add a wrapping structure to serve as the memcpy() source, so the compiler can do appropriate bounds checking, avoiding this future warning: In function '__fortify_memcpy', inlined from 'create_uid' at drivers/s390/block/dasd_eckd.c:749:2: ./include/linux/fortify-string.h:246:4: error: call to '__read_overflow2_field' declared with attribute error: detected read beyond size of field (2nd parameter) Signed-off-by: Kees Cook Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20210701142221.3408680-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_eckd.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index a6ac505cbdd7..0de1a463c509 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -746,7 +746,7 @@ static void create_uid(struct dasd_eckd_private *private) memcpy(uid->vendor, private->ned->HDA_manufacturer, sizeof(uid->vendor) - 1); EBCASC(uid->vendor, sizeof(uid->vendor) - 1); - memcpy(uid->serial, private->ned->HDA_location, + memcpy(uid->serial, &private->ned->serial, sizeof(uid->serial) - 1); EBCASC(uid->serial, sizeof(uid->serial) - 1); uid->ssid = private->gneq->subsystemID; diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 73651211789f..65e4630ad2ae 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -332,8 +332,10 @@ struct dasd_ned { __u8 dev_type[6]; __u8 dev_model[3]; __u8 HDA_manufacturer[3]; - __u8 HDA_location[2]; - __u8 HDA_seqno[12]; + struct { + __u8 HDA_location[2]; + __u8 HDA_seqno[12]; + } serial; __u8 ID; __u8 unit_addr; } __attribute__ ((packed)); From 498dcc13fd6463de29b94e160f40ed04d5477cd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 10:16:37 +0200 Subject: [PATCH 29/34] block: grab a device refcount in disk_uevent Sending uevents requires the struct device to be alive. To ensure that grab the device refcount instead of just an inode reference. Fixes: bc359d03c7ec ("block: add a disk_uevent helper") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210701081638.246552-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 79aa40b4c39c..af4d2ab4a633 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -365,12 +365,12 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action) xa_for_each(&disk->part_tbl, idx, part) { if (bdev_is_partition(part) && !bdev_nr_sectors(part)) continue; - if (!bdgrab(part)) + if (!kobject_get_unless_zero(&part->bd_device.kobj)) continue; rcu_read_unlock(); kobject_uevent(bdev_kobj(part), action); - bdput(part); + put_device(&part->bd_device); rcu_read_lock(); } rcu_read_unlock(); From 63c38d858e0b064a942383d33ccce4ca56df8283 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jul 2021 10:16:38 +0200 Subject: [PATCH 30/34] block: remove the bdgrab in blk_drop_partitions There is no need to hold a bdev reference when removing the partition. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210701081638.246552-3-hch@lst.de Signed-off-by: Jens Axboe --- block/partitions/core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index ed78cdfe054b..4230d4f71879 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -546,12 +546,8 @@ void blk_drop_partitions(struct gendisk *disk) lockdep_assert_held(&disk->open_mutex); - xa_for_each_start(&disk->part_tbl, idx, part, 1) { - if (!bdgrab(part)) - continue; + xa_for_each_start(&disk->part_tbl, idx, part, 1) delete_partition(part); - bdput(part); - } } static bool blk_add_partition(struct gendisk *disk, From 585af8ede7035379b712cacca80e9c2c34853d4b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 3 Jul 2021 00:27:14 +0900 Subject: [PATCH 31/34] loop: remove unused variable in loop_set_status() Commit 0384264ea8a39bd9 ("block: pass a gendisk to bdev_disk_changed") changed to pass lo->lo_disk instead of lo->lo_device. Fixes: 0384264ea8a3 ("block: pass a gendisk to bdev_disk_changed") Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/20210702152714.7978-1-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 39c05cf518fb..f37b9e3d833c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1434,7 +1434,6 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - struct block_device *bdev; kuid_t uid = current_uid(); int prev_lo_flags; bool partscan = false; @@ -1503,7 +1502,6 @@ out_unfreeze: if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - bdev = lo->lo_device; partscan = true; } out_unlock: From 0755d3be2d9bb6ea38598ccd30d6bbaa1a5c3a50 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 2 Jul 2021 10:11:21 +0200 Subject: [PATCH 32/34] nvme-tcp: can't set sk_user_data without write_lock The sk_user_data pointer is supposed to be modified only while holding the write_lock "sk_callback_lock", otherwise we could race with other threads and crash the kernel. we can't take the write_lock in nvmet_tcp_state_change() because it would cause a deadlock, but the release_work queue will set the pointer to NULL later so we can simply remove the assignment. Fixes: b5332a9f3f3d ("nvmet-tcp: fix incorrect locking in state_change sk callback") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index d8aceef83284..07ee347ea3f3 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1497,7 +1497,6 @@ static void nvmet_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: case TCP_CLOSE: /* FALLTHRU */ - sk->sk_user_data = NULL; nvmet_tcp_schedule_release_queue(queue); break; default: From d80c228d44640f0b47b57a2ca4afa26ef87e16b0 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 6 Jul 2021 05:47:26 +0800 Subject: [PATCH 33/34] block: fix the problem of io_ticks becoming smaller On the IO submission path, blk_account_io_start() may interrupt the system interruption. When the interruption returns, the value of part->stamp may have been updated by other cores, so the time value collected before the interruption may be less than part-> stamp. So when this happens, we should do nothing to make io_ticks more accurate? For kernels less than 5.0, this may cause io_ticks to become smaller, which in turn may cause abnormal ioutil values. Signed-off-by: Chunguang Xu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/1625521646-1069-1-git-send-email-brookxu.cn@gmail.com Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 3eea8d795565..04477697ee4b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1241,7 +1241,7 @@ static void update_io_ticks(struct block_device *part, unsigned long now, unsigned long stamp; again: stamp = READ_ONCE(part->bd_stamp); - if (unlikely(stamp != now)) { + if (unlikely(time_after(now, stamp))) { if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } From a731763fc479a9c64456e0643d0ccf64203100c9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 7 Jul 2021 09:56:49 +0800 Subject: [PATCH 34/34] blk-cgroup: prevent rcu_sched detected stalls warnings while iterating blkgs We run a test that create millions of cgroups and blkgs, and then trigger blkg_destroy_all(). blkg_destroy_all() will hold spin lock for a long time in such situation. Thus release the lock when a batch of blkgs are destroyed. blkcg_activate_policy() and blkcg_deactivate_policy() might have the same problem, however, as they are basically only called from module init/exit paths, let's leave them alone for now. Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210707015649.1929797-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 7b06a5fa3cac..575d7a2e7203 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -56,6 +56,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; static struct workqueue_struct *blkcg_punt_bio_wq; +#define BLKG_DESTROY_BATCH_SIZE 64 + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -422,7 +424,9 @@ static void blkg_destroy(struct blkcg_gq *blkg) static void blkg_destroy_all(struct request_queue *q) { struct blkcg_gq *blkg, *n; + int count = BLKG_DESTROY_BATCH_SIZE; +restart: spin_lock_irq(&q->queue_lock); list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; @@ -430,6 +434,17 @@ static void blkg_destroy_all(struct request_queue *q) spin_lock(&blkcg->lock); blkg_destroy(blkg); spin_unlock(&blkcg->lock); + + /* + * in order to avoid holding the spin lock for too long, release + * it when a batch of blkgs are destroyed. + */ + if (!(--count)) { + count = BLKG_DESTROY_BATCH_SIZE; + spin_unlock_irq(&q->queue_lock); + cond_resched(); + goto restart; + } } q->root_blkg = NULL;