diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ab6a61db63ce..13956437bc81 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1216,9 +1216,24 @@ error: return -EINVAL; } +static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct crypt_config *cc = ti->private; + struct request_queue *q = bdev_get_queue(cc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cc->dev->bdev; + bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 5, 0}, + .version= {1, 6, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1228,6 +1243,7 @@ static struct target_type crypt_target = { .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, + .merge = crypt_merge, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 17753d80ad22..6449bcdf84ca 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -69,13 +69,25 @@ static void linear_dtr(struct dm_target *ti) kfree(lc); } +static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) +{ + struct linear_c *lc = ti->private; + + return lc->start + (bi_sector - ti->begin); +} + +static void linear_map_bio(struct dm_target *ti, struct bio *bio) +{ + struct linear_c *lc = ti->private; + + bio->bi_bdev = lc->dev->bdev; + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); +} + static int linear_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct linear_c *lc = (struct linear_c *) ti->private; - - bio->bi_bdev = lc->dev->bdev; - bio->bi_sector = lc->start + (bio->bi_sector - ti->begin); + linear_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } @@ -114,15 +126,31 @@ static int linear_ioctl(struct dm_target *ti, struct inode *inode, return blkdev_driver_ioctl(bdev->bd_inode, &fake_file, bdev->bd_disk, cmd, arg); } +static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct linear_c *lc = ti->private; + struct request_queue *q = bdev_get_queue(lc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = lc->dev->bdev; + bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector); + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type linear_target = { .name = "linear", - .version= {1, 0, 2}, + .version= {1, 0, 3}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, .status = linear_status, .ioctl = linear_ioctl, + .merge = linear_merge, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 67a6f31b7fc3..5b48478c79f5 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -831,7 +831,7 @@ static struct dm_dirty_log_type _disk_type = { .status = disk_status, }; -int __init dm_dirty_log_init(void) +static int __init dm_dirty_log_init(void) { int r; @@ -848,7 +848,7 @@ int __init dm_dirty_log_init(void) return r; } -void __exit dm_dirty_log_exit(void) +static void __exit dm_dirty_log_exit(void) { dm_dirty_log_type_unregister(&_disk_type); dm_dirty_log_type_unregister(&_core_type); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9f7302d4878d..fea966d66f98 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -525,8 +525,10 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, } r = read_param(_params, shift(as), &ps_argc, &ti->error); - if (r) + if (r) { + dm_put_path_selector(pst); return -EINVAL; + } r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { @@ -623,8 +625,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, struct pgpath *pgpath; struct arg_set path_args; - if (as->argc < nr_params) + if (as->argc < nr_params) { + ti->error = "not enough path parameters"; goto bad; + } path_args.argc = nr_params; path_args.argv = as->argv; @@ -867,7 +871,7 @@ static int reinstate_path(struct pgpath *pgpath) if (pgpath->path.is_active) goto out; - if (!pgpath->pg->ps.type) { + if (!pgpath->pg->ps.type->reinstate_path) { DMWARN("Reinstate path not supported by path selector %s", pgpath->pg->ps.type->name); r = -EINVAL; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba8a47d61b1..6e5528aecc98 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -40,6 +40,11 @@ */ #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) +/* + * The size of the mempool used to track chunks in use. + */ +#define MIN_IOS 256 + static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); @@ -91,7 +96,63 @@ struct dm_snap_pending_exception { */ static struct kmem_cache *exception_cache; static struct kmem_cache *pending_cache; -static mempool_t *pending_pool; + +struct dm_snap_tracked_chunk { + struct hlist_node node; + chunk_t chunk; +}; + +static struct kmem_cache *tracked_chunk_cache; + +static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, + chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, + GFP_NOIO); + unsigned long flags; + + c->chunk = chunk; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_add_head(&c->node, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + return c; +} + +static void stop_tracking_chunk(struct dm_snapshot *s, + struct dm_snap_tracked_chunk *c) +{ + unsigned long flags; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_del(&c->node); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + mempool_free(c, s->tracked_chunk_pool); +} + +static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c; + struct hlist_node *hn; + int found = 0; + + spin_lock_irq(&s->tracked_chunk_lock); + + hlist_for_each_entry(c, hn, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { + if (c->chunk == chunk) { + found = 1; + break; + } + } + + spin_unlock_irq(&s->tracked_chunk_lock); + + return found; +} /* * One of these per registered origin, held in the snapshot_origins hash @@ -302,14 +363,19 @@ static void free_exception(struct dm_snap_exception *e) kmem_cache_free(exception_cache, e); } -static struct dm_snap_pending_exception *alloc_pending_exception(void) +static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) { - return mempool_alloc(pending_pool, GFP_NOIO); + struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, + GFP_NOIO); + + pe->snap = s; + + return pe; } static void free_pending_exception(struct dm_snap_pending_exception *pe) { - mempool_free(pe, pending_pool); + mempool_free(pe, pe->snap->pending_pool); } static void insert_completed_exception(struct dm_snapshot *s, @@ -482,6 +548,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + int i; int r = -EINVAL; char persistent; char *origin_path; @@ -564,11 +631,30 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad5; } + s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); + if (!s->pending_pool) { + ti->error = "Could not allocate mempool for pending exceptions"; + goto bad6; + } + + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, + tracked_chunk_cache); + if (!s->tracked_chunk_pool) { + ti->error = "Could not allocate tracked_chunk mempool for " + "tracking reads"; + goto bad_tracked_chunk_pool; + } + + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); + + spin_lock_init(&s->tracked_chunk_lock); + /* Metadata must only be loaded into one table at once */ r = s->store.read_metadata(&s->store); if (r < 0) { ti->error = "Failed to read snapshot metadata"; - goto bad6; + goto bad_load_and_register; } else if (r > 0) { s->valid = 0; DMWARN("Snapshot is marked invalid."); @@ -582,7 +668,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (register_snapshot(s)) { r = -EINVAL; ti->error = "Cannot register snapshot origin"; - goto bad6; + goto bad_load_and_register; } ti->private = s; @@ -590,6 +676,12 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return 0; + bad_load_and_register: + mempool_destroy(s->tracked_chunk_pool); + + bad_tracked_chunk_pool: + mempool_destroy(s->pending_pool); + bad6: dm_kcopyd_client_destroy(s->kcopyd_client); @@ -624,6 +716,9 @@ static void __free_exceptions(struct dm_snapshot *s) static void snapshot_dtr(struct dm_target *ti) { +#ifdef CONFIG_DM_DEBUG + int i; +#endif struct dm_snapshot *s = ti->private; flush_workqueue(ksnapd); @@ -632,8 +727,17 @@ static void snapshot_dtr(struct dm_target *ti) /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); +#ifdef CONFIG_DM_DEBUG + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); +#endif + + mempool_destroy(s->tracked_chunk_pool); + __free_exceptions(s); + mempool_destroy(s->pending_pool); + dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); @@ -771,6 +875,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) goto out; } + /* + * Check for conflicting reads. This is extremely improbable, + * so yield() is sufficient and there is no need for a wait queue. + */ + while (__chunk_is_tracked(s, pe->e.old_chunk)) + yield(); + /* * Add a proper exception, and remove the * in-flight exception from the list. @@ -873,7 +984,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) * to hold the lock while we do this. */ up_write(&s->lock); - pe = alloc_pending_exception(); + pe = alloc_pending_exception(s); down_write(&s->lock); if (!s->valid) { @@ -893,7 +1004,6 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) bio_list_init(&pe->snapshot_bios); pe->primary_pe = NULL; atomic_set(&pe->ref_count, 0); - pe->snap = s; pe->started = 0; if (s->store.prepare_exception(&s->store, &pe->e)) { @@ -974,14 +1084,10 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, start_copy(pe); goto out; } - } else - /* - * FIXME: this read path scares me because we - * always use the origin when we have a pending - * exception. However I can't think of a - * situation where this is wrong - ejt. - */ + } else { bio->bi_bdev = s->origin->bdev; + map_context->ptr = track_chunk(s, chunk); + } out_unlock: up_write(&s->lock); @@ -989,6 +1095,18 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, return r; } +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct dm_snapshot *s = ti->private; + struct dm_snap_tracked_chunk *c = map_context->ptr; + + if (c) + stop_tracking_chunk(s, c); + + return 0; +} + static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; @@ -1266,6 +1384,7 @@ static struct target_type snapshot_target = { .ctr = snapshot_ctr, .dtr = snapshot_dtr, .map = snapshot_map, + .end_io = snapshot_end_io, .resume = snapshot_resume, .status = snapshot_status, }; @@ -1306,9 +1425,9 @@ static int __init dm_snapshot_init(void) goto bad4; } - pending_pool = mempool_create_slab_pool(128, pending_cache); - if (!pending_pool) { - DMERR("Couldn't create pending pool."); + tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); + if (!tracked_chunk_cache) { + DMERR("Couldn't create cache to track chunks in use."); r = -ENOMEM; goto bad5; } @@ -1317,13 +1436,13 @@ static int __init dm_snapshot_init(void) if (!ksnapd) { DMERR("Failed to create ksnapd workqueue."); r = -ENOMEM; - goto bad6; + goto bad_pending_pool; } return 0; - bad6: - mempool_destroy(pending_pool); + bad_pending_pool: + kmem_cache_destroy(tracked_chunk_cache); bad5: kmem_cache_destroy(pending_cache); bad4: @@ -1352,9 +1471,9 @@ static void __exit dm_snapshot_exit(void) DMERR("origin unregister failed %d", r); exit_origin_hash(); - mempool_destroy(pending_pool); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); + kmem_cache_destroy(tracked_chunk_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 24f9fb73b982..292c15609ae3 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -130,6 +130,10 @@ struct exception_store { void *context; }; +#define DM_TRACKED_CHUNK_HASH_SIZE 16 +#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + struct dm_snapshot { struct rw_semaphore lock; struct dm_target *ti; @@ -157,6 +161,8 @@ struct dm_snapshot { /* The last percentage we notified */ int last_percent; + mempool_t *pending_pool; + struct exception_table pending; struct exception_table complete; @@ -174,6 +180,11 @@ struct dm_snapshot { /* Queue of snapshot writes for ksnapd to flush */ struct bio_list queued_bios; struct work_struct queued_bios_work; + + /* Chunks with outstanding reads */ + mempool_t *tracked_chunk_pool; + spinlock_t tracked_chunk_lock; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; }; /* diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 94116eaf4709..798e468103b8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -506,14 +506,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_sectors = min_not_zero(rs->max_sectors, q->max_sectors); - /* FIXME: Device-Mapper on top of RAID-0 breaks because DM - * currently doesn't honor MD's merge_bvec_fn routine. - * In this case, we'll force DM to use PAGE_SIZE or - * smaller I/O, just to be safe. A better fix is in the - * works, but add this for the time being so it will at - * least operate correctly. + /* + * Check if merge fn is supported. + * If not we'll force DM to use PAGE_SIZE or + * smaller I/O, just to be safe. */ - if (q->merge_bvec_fn) + + if (q->merge_bvec_fn && !ti->type->merge) rs->max_sectors = min_not_zero(rs->max_sectors, (unsigned int) (PAGE_SIZE >> 9)); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 372369b1cc20..bca448e11878 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,8 +37,8 @@ static DEFINE_SPINLOCK(_minor_lock); struct dm_io { struct mapped_device *md; int error; - struct bio *bio; atomic_t io_count; + struct bio *bio; unsigned long start_time; }; @@ -829,6 +829,49 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) * CRUD END *---------------------------------------------------------------*/ +static int dm_merge_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) +{ + struct mapped_device *md = q->queuedata; + struct dm_table *map = dm_get_table(md); + struct dm_target *ti; + sector_t max_sectors; + int max_size; + + if (unlikely(!map)) + return 0; + + ti = dm_table_find_target(map, bvm->bi_sector); + + /* + * Find maximum amount of I/O that won't need splitting + */ + max_sectors = min(max_io_len(md, bvm->bi_sector, ti), + (sector_t) BIO_MAX_SECTORS); + max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; + if (max_size < 0) + max_size = 0; + + /* + * merge_bvec_fn() returns number of bytes + * it can accept at this offset + * max is precomputed maximal io size + */ + if (max_size && ti->type->merge) + max_size = ti->type->merge(ti, bvm, biovec, max_size); + + /* + * Always allow an entire first page + */ + if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) + max_size = biovec->bv_len; + + dm_table_put(map); + + return max_size; +} + /* * The request function that just remaps the bio built up by * dm_merge_bvec. @@ -1032,6 +1075,7 @@ static struct mapped_device *alloc_dev(int minor) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; + blk_queue_merge_bvec(md->queue, dm_merge_bvec); md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); if (!md->io_pool) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8c03b634e62e..1e59a0b0a78a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -100,12 +100,6 @@ int dm_lock_for_deletion(struct mapped_device *md); void dm_kobject_uevent(struct mapped_device *md); -/* - * Dirty log - */ -int dm_dirty_log_init(void); -void dm_dirty_log_exit(void); - int dm_kcopyd_init(void); void dm_kcopyd_exit(void); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 0d8d419d191a..a90222e3297d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -9,11 +9,13 @@ #define _LINUX_DEVICE_MAPPER_H #include +#include struct dm_target; struct dm_table; struct dm_dev; struct mapped_device; +struct bio_vec; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; @@ -72,6 +74,9 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size); + void dm_error(const char *message); /* @@ -107,6 +112,7 @@ struct target_type { dm_status_fn status; dm_message_fn message; dm_ioctl_fn ioctl; + dm_merge_fn merge; }; struct io_restrictions { diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index b03c41bbfa14..28c2940eb30d 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -256,9 +256,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 13 +#define DM_VERSION_MINOR 14 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2007-10-18)" +#define DM_VERSION_EXTRA "-ioctl (2008-04-23)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */