From 3f77316de0ec0fd208467fbee8d9edc70e2c73b2 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Thu, 12 Aug 2010 04:13:56 +0100 Subject: [PATCH] dm: separate device deletion from dm_put This patch separates the device deletion code from dm_put() to make sure the deletion happens in the process context. By this patch, device deletion always occurs in an ioctl (process) context and dm_put() can be called in interrupt context. As a result, the request-based dm's bad dm_put() usage pointed out by Mikulas below disappears. http://marc.info/?l=dm-devel&m=126699981019735&w=2 Without this patch, I confirmed there is a case to crash the system: dm_put() => dm_table_destroy() => vfree() => BUG_ON(in_interrupt()) Some more backgrounds and details: In request-based dm, a device opener can remove a mapped_device while the last request is still completing, because bios in the last request complete first and then the device opener can close and remove the mapped_device before the last request completes: CPU0 CPU1 ================================================================= <> blk_end_request_all(clone_rq) blk_update_request(clone_rq) bio_endio(clone_bio) == end_clone_bio blk_update_request(orig_rq) bio_endio(orig_bio) <> dm_blk_close() dev_remove() dm_put(md) <> blk_finish_request(clone_rq) .... dm_end_request(clone_rq) free_rq_clone(clone_rq) blk_end_request_all(orig_rq) rq_completed(md) So request-based dm used dm_get()/dm_put() to hold md for each I/O until its request completion handling is fully done. However, the final dm_put() can call the device deletion code which must not be run in interrupt context and may cause kernel panic. To solve the problem, this patch moves the device deletion code, dm_destroy(), to predetermined places that is actually deleting the mapped_device in ioctl (process) context, and changes dm_put() just to decrement the reference count of the mapped_device. By this change, dm_put() can be used in any context and the symmetric model below is introduced: dm_create(): create a mapped_device dm_destroy(): destroy a mapped_device dm_get(): increment the reference count of a mapped_device dm_put(): decrement the reference count of a mapped_device dm_destroy() waits for all references of the mapped_device to disappear, then deletes the mapped_device. dm_destroy() uses active waiting with msleep(1), since deleting the mapped_device isn't performance-critical task. And since at this point, nobody opens the mapped_device and no new reference will be taken, the pending counts are just for racing completing activity and will eventually decrease to zero. For the unlikely case of the forced module unload, dm_destroy_immediate(), which doesn't wait and forcibly deletes the mapped_device, is also introduced and used in dm_hash_remove_all(). Otherwise, "rmmod -f" may be stuck and never return. And now, because the mapped_device is deleted at this point, subsequent accesses to the mapped_device may cause NULL pointer references. Cc: stable@kernel.org Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 15 ++++++++--- drivers/md/dm.c | 62 ++++++++++++++++++++++++++++++++----------- drivers/md/dm.h | 5 ++++ 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 6a6d475f8e80..feb64d65fbfb 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -274,6 +274,10 @@ retry: up_write(&_hash_lock); dm_put(md); + if (likely(keep_open_devices)) + dm_destroy(md); + else + dm_destroy_immediate(md); /* * Some mapped devices may be using other mapped @@ -644,17 +648,19 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) return r; r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); - if (r) - goto out; + if (r) { + dm_put(md); + dm_destroy(md); + return r; + } param->flags &= ~DM_INACTIVE_PRESENT_FLAG; __dev_status(md, param); -out: dm_put(md); - return r; + return 0; } /* @@ -748,6 +754,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) param->flags |= DM_UEVENT_GENERATED_FLAG; dm_put(md); + dm_destroy(md); return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ba6934c3e2c5..a503b95ecbfb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -2171,6 +2172,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr) void dm_get(struct mapped_device *md) { atomic_inc(&md->holders); + BUG_ON(test_bit(DMF_FREEING, &md->flags)); } const char *dm_device_name(struct mapped_device *md) @@ -2179,27 +2181,55 @@ const char *dm_device_name(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_device_name); -void dm_put(struct mapped_device *md) +static void __dm_destroy(struct mapped_device *md, bool wait) { struct dm_table *map; - BUG_ON(test_bit(DMF_FREEING, &md->flags)); + might_sleep(); - if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { - map = dm_get_live_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, - MINOR(disk_devt(dm_disk(md)))); - set_bit(DMF_FREEING, &md->flags); - spin_unlock(&_minor_lock); - if (!dm_suspended_md(md)) { - dm_table_presuspend_targets(map); - dm_table_postsuspend_targets(map); - } - dm_sysfs_exit(md); - dm_table_put(map); - dm_table_destroy(__unbind(md)); - free_dev(md); + spin_lock(&_minor_lock); + map = dm_get_live_table(md); + idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); + set_bit(DMF_FREEING, &md->flags); + spin_unlock(&_minor_lock); + + if (!dm_suspended_md(md)) { + dm_table_presuspend_targets(map); + dm_table_postsuspend_targets(map); } + + /* + * Rare, but there may be I/O requests still going to complete, + * for example. Wait for all references to disappear. + * No one should increment the reference count of the mapped_device, + * after the mapped_device state becomes DMF_FREEING. + */ + if (wait) + while (atomic_read(&md->holders)) + msleep(1); + else if (atomic_read(&md->holders)) + DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", + dm_device_name(md), atomic_read(&md->holders)); + + dm_sysfs_exit(md); + dm_table_put(map); + dm_table_destroy(__unbind(md)); + free_dev(md); +} + +void dm_destroy(struct mapped_device *md) +{ + __dm_destroy(md, true); +} + +void dm_destroy_immediate(struct mapped_device *md) +{ + __dm_destroy(md, false); +} + +void dm_put(struct mapped_device *md) +{ + atomic_dec(&md->holders); } EXPORT_SYMBOL_GPL(dm_put); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index bad1724d4869..8223671e4901 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -122,6 +122,11 @@ void dm_linear_exit(void); int dm_stripe_init(void); void dm_stripe_exit(void); +/* + * mapped_device operations + */ +void dm_destroy(struct mapped_device *md); +void dm_destroy_immediate(struct mapped_device *md); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md);