From 6cc8e7430801fa238bd7d3acae1eb406c6e02fe1 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 26 Jan 2021 09:46:30 -0500 Subject: [PATCH] loop: scale loop device by introducing per device lock Currently, loop device has only one global lock: loop_ctl_mutex. This becomes hot in scenarios where many loop devices are used. Scale it by introducing per-device lock: lo_mutex that protects modifications of all fields in struct loop_device. Keep loop_ctl_mutex to protect global data: loop_index_idr, loop_lookup, loop_add. The new lock ordering requirement is that loop_ctl_mutex must be taken before lo_mutex. Signed-off-by: Pavel Tatashin Reviewed-by: Tyler Hicks Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- drivers/block/loop.c | 93 +++++++++++++++++++++++++------------------- drivers/block/loop.h | 1 + 2 files changed, 54 insertions(+), 40 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e5ff328f0917..578fc034db3f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -704,7 +704,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, int error; bool partscan; - error = mutex_lock_killable(&loop_ctl_mutex); + error = mutex_lock_killable(&lo->lo_mutex); if (error) return error; error = -ENXIO; @@ -743,9 +743,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); /* - * We must drop file reference outside of loop_ctl_mutex as dropping + * We must drop file reference outside of lo_mutex as dropping * the file ref can take bd_mutex which creates circular locking * dependency. */ @@ -755,7 +755,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); if (file) fput(file); return error; @@ -1092,7 +1092,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&loop_ctl_mutex); + error = mutex_lock_killable(&lo->lo_mutex); if (error) goto out_bdev; @@ -1171,7 +1171,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); if (partscan) loop_reread_partitions(lo, bdev); if (!(mode & FMODE_EXCL)) @@ -1179,7 +1179,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); @@ -1200,7 +1200,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) bool partscan = false; int lo_number; - mutex_lock(&loop_ctl_mutex); + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; goto out_unlock; @@ -1253,7 +1253,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) lo_number = lo->lo_number; loop_unprepare_queue(lo); out_unlock: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); if (partscan) { /* * bd_mutex has been held already in release path, so don't @@ -1284,18 +1284,17 @@ out_unlock: * protects us from all the other places trying to change the 'lo' * device. */ - mutex_lock(&loop_ctl_mutex); + mutex_lock(&lo->lo_mutex); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; lo->lo_state = Lo_unbound; - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); /* - * Need not hold loop_ctl_mutex to fput backing file. - * Calling fput holding loop_ctl_mutex triggers a circular - * lock dependency possibility warning as fput can take - * bd_mutex which is usually taken before loop_ctl_mutex. + * Need not hold lo_mutex to fput backing file. Calling fput holding + * lo_mutex triggers a circular lock dependency possibility warning as + * fput can take bd_mutex which is usually taken before lo_mutex. */ if (filp) fput(filp); @@ -1306,11 +1305,11 @@ static int loop_clr_fd(struct loop_device *lo) { int err; - err = mutex_lock_killable(&loop_ctl_mutex); + err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; if (lo->lo_state != Lo_bound) { - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return -ENXIO; } /* @@ -1325,11 +1324,11 @@ static int loop_clr_fd(struct loop_device *lo) */ if (atomic_read(&lo->lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return 0; } lo->lo_state = Lo_rundown; - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return __loop_clr_fd(lo, false); } @@ -1344,7 +1343,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bool partscan = false; bool size_changed = false; - err = mutex_lock_killable(&loop_ctl_mutex); + err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; if (lo->lo_encrypt_key_size && @@ -1411,7 +1410,7 @@ out_unfreeze: partscan = true; } out_unlock: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); if (partscan) loop_reread_partitions(lo, bdev); @@ -1425,11 +1424,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) struct kstat stat; int ret; - ret = mutex_lock_killable(&loop_ctl_mutex); + ret = mutex_lock_killable(&lo->lo_mutex); if (ret) return ret; if (lo->lo_state != Lo_bound) { - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return -ENXIO; } @@ -1448,10 +1447,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) lo->lo_encrypt_key_size); } - /* Drop loop_ctl_mutex while we call into the filesystem. */ + /* Drop lo_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; path_get(&path); - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); @@ -1637,7 +1636,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, { int err; - err = mutex_lock_killable(&loop_ctl_mutex); + err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; switch (cmd) { @@ -1653,7 +1652,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return err; } @@ -1879,27 +1878,33 @@ static int lo_open(struct block_device *bdev, fmode_t mode) struct loop_device *lo; int err; + /* + * take loop_ctl_mutex to protect lo pointer from race with + * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention + * release it prior to updating lo->lo_refcnt. + */ err = mutex_lock_killable(&loop_ctl_mutex); if (err) return err; lo = bdev->bd_disk->private_data; if (!lo) { - err = -ENXIO; - goto out; + mutex_unlock(&loop_ctl_mutex); + return -ENXIO; } - - atomic_inc(&lo->lo_refcnt); -out: + err = mutex_lock_killable(&lo->lo_mutex); mutex_unlock(&loop_ctl_mutex); - return err; + if (err) + return err; + atomic_inc(&lo->lo_refcnt); + mutex_unlock(&lo->lo_mutex); + return 0; } static void lo_release(struct gendisk *disk, fmode_t mode) { - struct loop_device *lo; + struct loop_device *lo = disk->private_data; - mutex_lock(&loop_ctl_mutex); - lo = disk->private_data; + mutex_lock(&lo->lo_mutex); if (atomic_dec_return(&lo->lo_refcnt)) goto out_unlock; @@ -1907,7 +1912,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) if (lo->lo_state != Lo_bound) goto out_unlock; lo->lo_state = Lo_rundown; - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); /* * In autoclear mode, stop the loop thread * and remove configuration after last close. @@ -1924,7 +1929,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) } out_unlock: - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); } static const struct block_device_operations lo_fops = { @@ -1963,10 +1968,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) struct loop_device *lo = ptr; struct loop_func_table *xfer = data; - mutex_lock(&loop_ctl_mutex); + mutex_lock(&lo->lo_mutex); if (lo->lo_encryption == xfer) loop_release_xfer(lo); - mutex_unlock(&loop_ctl_mutex); + mutex_unlock(&lo->lo_mutex); return 0; } @@ -2152,6 +2157,7 @@ static int loop_add(struct loop_device **l, int i) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; atomic_set(&lo->lo_refcnt, 0); + mutex_init(&lo->lo_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; @@ -2182,6 +2188,7 @@ static void loop_remove(struct loop_device *lo) blk_cleanup_queue(lo->lo_queue); blk_mq_free_tag_set(&lo->tag_set); put_disk(lo->lo_disk); + mutex_destroy(&lo->lo_mutex); kfree(lo); } @@ -2261,15 +2268,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; + ret = mutex_lock_killable(&lo->lo_mutex); + if (ret) + break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; + mutex_unlock(&lo->lo_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; + mutex_unlock(&lo->lo_mutex); break; } lo->lo_disk->private_data = NULL; + mutex_unlock(&lo->lo_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index af75a5ee4094..a3c04f310672 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -62,6 +62,7 @@ struct loop_device { struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; struct gendisk *lo_disk; + struct mutex lo_mutex; }; struct loop_cmd {