diff --git a/block/blk-core.c b/block/blk-core.c index 2e5020f37d55..de474b5dee2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1522,7 +1522,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count) + unsigned int *request_count, + struct request **same_queue_rq) { struct blk_plug *plug; struct request *rq; @@ -1542,8 +1543,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, list_for_each_entry_reverse(rq, plug_list, queuelist) { int el_ret; - if (rq->q == q) + if (rq->q == q) { (*request_count)++; + /* + * Only blk-mq multiple hardware queues case checks the + * rq in the same queue, there should be only one such + * rq in a queue + **/ + if (same_queue_rq) + *same_queue_rq = rq; + } if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; @@ -1608,7 +1617,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * any locks. */ if (!blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count)) + blk_attempt_plug_merge(q, bio, &request_count, NULL)) return; spin_lock_irq(q->queue_lock); @@ -1716,8 +1725,6 @@ static void handle_bad_sector(struct bio *bio) bio->bi_rw, (unsigned long long)bio_end_sector(bio), (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); - - set_bit(BIO_EOF, &bio->bi_flags); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -3032,21 +3039,20 @@ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; + /* + * If this is a nested plug, don't actually assign it. + */ + if (tsk->plug) + return; + INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); - /* - * If this is a nested plug, don't actually assign it. It will be - * flushed on its own. + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier */ - if (!tsk->plug) { - /* - * Store ordering should not be needed here, since a potential - * preempt will imply a full memory barrier - */ - tsk->plug = plug; - } + tsk->plug = plug; } EXPORT_SYMBOL(blk_start_plug); @@ -3193,10 +3199,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) void blk_finish_plug(struct blk_plug *plug) { + if (plug != current->plug) + return; blk_flush_plug_list(plug, false); - if (plug == current->plug) - current->plug = NULL; + current->plug = NULL; } EXPORT_SYMBOL(blk_finish_plug); diff --git a/block/blk-mq.c b/block/blk-mq.c index ade8a2d1b0aa..c382a34fe5ac 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, - !q->mq_freeze_depth || blk_queue_dying(q)); + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; if (ret) @@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) void blk_mq_freeze_queue_start(struct request_queue *q) { - bool freeze; + int freeze_depth; - spin_lock_irq(q->queue_lock); - freeze = !q->mq_freeze_depth++; - spin_unlock_irq(q->queue_lock); - - if (freeze) { + freeze_depth = atomic_inc_return(&q->mq_freeze_depth); + if (freeze_depth == 1) { percpu_ref_kill(&q->mq_usage_counter); blk_mq_run_hw_queues(q, false); } @@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); void blk_mq_unfreeze_queue(struct request_queue *q) { - bool wake; + int freeze_depth; - spin_lock_irq(q->queue_lock); - wake = !--q->mq_freeze_depth; - WARN_ON_ONCE(q->mq_freeze_depth < 0); - spin_unlock_irq(q->queue_lock); - if (wake) { + freeze_depth = atomic_dec_return(&q->mq_freeze_depth); + WARN_ON_ONCE(freeze_depth < 0); + if (!freeze_depth) { percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); } @@ -1224,6 +1220,38 @@ static struct request *blk_mq_map_request(struct request_queue *q, return rq; } +static int blk_mq_direct_issue_request(struct request *rq) +{ + int ret; + struct request_queue *q = rq->q; + struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, + rq->mq_ctx->cpu); + struct blk_mq_queue_data bd = { + .rq = rq, + .list = NULL, + .last = 1 + }; + + /* + * For OK queue, we are done. For error, kill it. Any other + * error (busy), just add it to our list as we previously + * would have done + */ + ret = q->mq_ops->queue_rq(hctx, &bd); + if (ret == BLK_MQ_RQ_QUEUE_OK) + return 0; + else { + __blk_mq_requeue_request(rq); + + if (ret == BLK_MQ_RQ_QUEUE_ERROR) { + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + return 0; + } + return -1; + } +} + /* * Multiple hardware queue variant. This will not use per-process plugs, * but will attempt to bypass the hctx queueing if we can go straight to @@ -1235,6 +1263,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); struct blk_map_ctx data; struct request *rq; + unsigned int request_count = 0; + struct blk_plug *plug; + struct request *same_queue_rq = NULL; blk_queue_bounce(q, &bio); @@ -1243,6 +1274,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) return; } + if (!is_flush_fua && !blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) + return; + rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) return; @@ -1253,38 +1288,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) goto run_queue; } + plug = current->plug; /* * If the driver supports defer issued based on 'last', then * queue it up like normal since we can potentially save some * CPU this way. */ - if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { - struct blk_mq_queue_data bd = { - .rq = rq, - .list = NULL, - .last = 1 - }; - int ret; + if (((plug && !blk_queue_nomerges(q)) || is_sync) && + !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { + struct request *old_rq = NULL; blk_mq_bio_to_request(rq, bio); /* - * For OK queue, we are done. For error, kill it. Any other - * error (busy), just add it to our list as we previously - * would have done + * we do limited pluging. If bio can be merged, do merge. + * Otherwise the existing request in the plug list will be + * issued. So the plug list will have one request at most */ - ret = q->mq_ops->queue_rq(data.hctx, &bd); - if (ret == BLK_MQ_RQ_QUEUE_OK) - goto done; - else { - __blk_mq_requeue_request(rq); - - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - goto done; + if (plug) { + /* + * The plug list might get flushed before this. If that + * happens, same_queue_rq is invalid and plug list is empty + **/ + if (same_queue_rq && !list_empty(&plug->mq_list)) { + old_rq = same_queue_rq; + list_del_init(&old_rq->queuelist); } - } + list_add_tail(&rq->queuelist, &plug->mq_list); + } else /* is_sync */ + old_rq = rq; + blk_mq_put_ctx(data.ctx); + if (!old_rq) + return; + if (!blk_mq_direct_issue_request(old_rq)) + return; + blk_mq_insert_request(old_rq, false, true, true); + return; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1297,7 +1336,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } -done: blk_mq_put_ctx(data.ctx); } @@ -1309,16 +1347,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); - unsigned int use_plug, request_count = 0; + struct blk_plug *plug; + unsigned int request_count = 0; struct blk_map_ctx data; struct request *rq; - /* - * If we have multiple hardware queues, just go directly to - * one of those for sync IO. - */ - use_plug = !is_flush_fua && !is_sync; - blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { @@ -1326,8 +1359,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) return; } - if (use_plug && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count)) + if (!is_flush_fua && !blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count, NULL)) return; rq = blk_mq_map_request(q, bio, &data); @@ -1345,21 +1378,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) * utilize that to temporarily store requests until the task is * either done or scheduled away. */ - if (use_plug) { - struct blk_plug *plug = current->plug; - - if (plug) { - blk_mq_bio_to_request(rq, bio); - if (list_empty(&plug->mq_list)) - trace_block_plug(q); - else if (request_count >= BLK_MAX_REQUEST_COUNT) { - blk_flush_plug_list(plug, false); - trace_block_plug(q); - } - list_add_tail(&rq->queuelist, &plug->mq_list); - blk_mq_put_ctx(data.ctx); - return; + plug = current->plug; + if (plug) { + blk_mq_bio_to_request(rq, bio); + if (list_empty(&plug->mq_list)) + trace_block_plug(q); + else if (request_count >= BLK_MAX_REQUEST_COUNT) { + blk_flush_plug_list(plug, false); + trace_block_plug(q); } + list_add_tail(&rq->queuelist, &plug->mq_list); + blk_mq_put_ctx(data.ctx); + return; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -2047,7 +2077,7 @@ void blk_mq_free_queue(struct request_queue *q) /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) { - WARN_ON_ONCE(!q->mq_freeze_depth); + WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); blk_mq_sysfs_unregister(q); diff --git a/block/blk.h b/block/blk.h index 4b48d55e588e..026d9594142b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, bool bio_attempt_back_merge(struct request_queue *q, struct request *req, struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count); + unsigned int *request_count, + struct request **same_queue_rq); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); diff --git a/block/bounce.c b/block/bounce.c index ab21ba203d5c..4bac72579c1f 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) struct bio_vec *bvec, *org_vec; int i; - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); - /* * free up bounce indirect pages used */ diff --git a/block/ioctl.c b/block/ioctl.c index 7d8befde2aca..8061eba42887 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } -static int blkdev_reread_part(struct block_device *bdev) +/* + * This is an exported API for the block driver, and will not + * acquire bd_mutex. This API should be used in case that + * caller has held bd_mutex already. + */ +int __blkdev_reread_part(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; - int res; if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!mutex_trylock(&bdev->bd_mutex)) - return -EBUSY; - res = rescan_partitions(disk, bdev); + + lockdep_assert_held(&bdev->bd_mutex); + + return rescan_partitions(disk, bdev); +} +EXPORT_SYMBOL(__blkdev_reread_part); + +/* + * This is an exported API for the block driver, and will + * try to acquire bd_mutex. If bd_mutex has been held already + * in current context, please call __blkdev_reread_part(). + * + * Make sure the held locks in current context aren't required + * in open()/close() handler and I/O path for avoiding ABBA deadlock: + * - bd_mutex is held before calling block driver's open/close + * handler + * - reading partition table may submit I/O to the block device + */ +int blkdev_reread_part(struct block_device *bdev) +{ + int res; + + mutex_lock(&bdev->bd_mutex); + res = __blkdev_reread_part(bdev); mutex_unlock(&bdev->bd_mutex); + return res; } +EXPORT_SYMBOL(blkdev_reread_part); static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2ef9a4b72d06..e08a926fe12c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3269,11 +3269,8 @@ static int write_dev_supers(struct btrfs_device *device, */ static void btrfs_end_empty_barrier(struct bio *bio, int err) { - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - } if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -3301,11 +3298,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk_in_rcu("BTRFS: disabling barriers on dev %s\n", - rcu_str_deref(device->name)); - device->nobarriers = 1; - } else if (!bio_flagged(bio, BIO_UPTODATE)) { + if (!bio_flagged(bio, BIO_UPTODATE)) { ret = -EIO; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 43af5a61ad25..1e155299abc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2767,8 +2767,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, else btrfsic_submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; bio_put(bio); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index c7a5602d01ee..efd85e0e8660 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2938,10 +2938,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) { struct buffer_head *bh = bio->bi_private; - if (err == -EOPNOTSUPP) { - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - } - if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) set_bit(BH_Quiet, &bh->b_state); @@ -3041,13 +3037,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) if (buffer_prio(bh)) rw |= REQ_PRIO; - bio_get(bio); submit_bio(rw, bio); - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - - bio_put(bio); return ret; } EXPORT_SYMBOL_GPL(_submit_bh); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5765f88b3904..c5d81e8d84c3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -359,7 +359,6 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { bio_get(io->io_bio); submit_bio(io->io_op, io->io_bio); - BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); bio_put(io->io_bio); } io->io_bio = NULL; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc3a9efdaab8..42468e5ab3e7 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -343,11 +343,6 @@ static void nilfs_end_bio_write(struct bio *bio, int err) const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nilfs_segment_buffer *segbuf = bio->bi_private; - if (err == -EOPNOTSUPP) { - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - /* to be detected by nilfs_segbuf_submit_bio() */ - } - if (!uptodate) atomic_inc(&segbuf->sb_err); @@ -374,15 +369,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; - bio_get(bio); submit_bio(mode, bio); segbuf->sb_nbio++; - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - bio_put(bio); - err = -EOPNOTSUPP; - goto failed; - } - bio_put(bio); wi->bio = NULL; wi->rest_blocks -= wi->end - wi->start; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 93d2e7153816..3f4ded0b1a34 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,18 +112,15 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_QUIET 9 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2da818a48097..bc917956a6d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -444,7 +444,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; - int mq_freeze_depth; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..1ef63900243c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); +extern int __blkdev_reread_part(struct block_device *bdev); +extern int blkdev_reread_part(struct block_device *bdev); + #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); extern void bd_unlink_disk_holder(struct block_device *bdev, diff --git a/include/linux/swap.h b/include/linux/swap.h index cee108cbe2d5..38874729dc5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 29472bff11ef..cb880a14cc39 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o -obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ - block_io.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c deleted file mode 100644 index 9a58bc258810..000000000000 --- a/kernel/power/block_io.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * This file provides functions for block I/O operations on swap/file. - * - * Copyright (C) 1998,2001-2005 Pavel Machek - * Copyright (C) 2006 Rafael J. Wysocki - * - * This file is released under the GPLv2. - */ - -#include -#include -#include -#include - -#include "power.h" - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * @bio_chain: list of pending biod (for async reading) - * - * Straight from the textbook - allocate and initialize the bio. - * If we're reading, make sure the page is marked as dirty. - * Then submit it and, if @bio_chain == NULL, wait. - */ -static int submit(int rw, struct block_device *bdev, sector_t sector, - struct page *page, struct bio **bio_chain) -{ - const int bio_rw = rw | REQ_SYNC; - struct bio *bio; - - bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_end_io = end_swap_bio_read; - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", - (unsigned long long)sector); - bio_put(bio); - return -EFAULT; - } - - lock_page(page); - bio_get(bio); - - if (bio_chain == NULL) { - submit_bio(bio_rw, bio); - wait_on_page_locked(page); - if (rw == READ) - bio_set_pages_dirty(bio); - bio_put(bio); - } else { - if (rw == READ) - get_page(page); /* These pages are freed later */ - bio->bi_private = *bio_chain; - *bio_chain = bio; - submit_bio(bio_rw, bio); - } - return 0; -} - -int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_wait_on_bio_chain(struct bio **bio_chain) -{ - struct bio *bio; - struct bio *next_bio; - int ret = 0; - - if (bio_chain == NULL) - return 0; - - bio = *bio_chain; - if (bio == NULL) - return 0; - while (bio) { - struct page *page; - - next_bio = bio->bi_private; - page = bio->bi_io_vec[0].bv_page; - wait_on_page_locked(page); - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - put_page(page); - bio_put(bio); - bio = next_bio; - } - *bio_chain = NULL; - return ret; -} diff --git a/kernel/power/power.h b/kernel/power/power.h index ce9b8328a689..caadb566e82b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t); extern int swsusp_unmark(void); #endif -/* kernel/power/block_io.c */ -extern struct block_device *hib_resume_bdev; - -extern int hib_bio_read_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_bio_write_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_wait_on_bio_chain(struct bio **bio_chain); - struct timeval; /* kernel/power/swsusp.c */ extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 570aff817543..2f30ca91e4fa 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -212,7 +212,84 @@ int swsusp_swap_in_use(void) */ static unsigned short root_swap = 0xffff; -struct block_device *hib_resume_bdev; +static struct block_device *hib_resume_bdev; + +struct hib_bio_batch { + atomic_t count; + wait_queue_head_t wait; + int error; +}; + +static void hib_init_batch(struct hib_bio_batch *hb) +{ + atomic_set(&hb->count, 0); + init_waitqueue_head(&hb->wait); + hb->error = 0; +} + +static void hib_end_io(struct bio *bio, int error) +{ + struct hib_bio_batch *hb = bio->bi_private; + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct page *page = bio->bi_io_vec[0].bv_page; + + if (!uptodate || error) { + printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", + imajor(bio->bi_bdev->bd_inode), + iminor(bio->bi_bdev->bd_inode), + (unsigned long long)bio->bi_iter.bi_sector); + + if (!error) + error = -EIO; + } + + if (bio_data_dir(bio) == WRITE) + put_page(page); + + if (error && !hb->error) + hb->error = error; + if (atomic_dec_and_test(&hb->count)) + wake_up(&hb->wait); + + bio_put(bio); +} + +static int hib_submit_io(int rw, pgoff_t page_off, void *addr, + struct hib_bio_batch *hb) +{ + struct page *page = virt_to_page(addr); + struct bio *bio; + int error = 0; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); + bio->bi_bdev = hib_resume_bdev; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", + (unsigned long long)bio->bi_iter.bi_sector); + bio_put(bio); + return -EFAULT; + } + + if (hb) { + bio->bi_end_io = hib_end_io; + bio->bi_private = hb; + atomic_inc(&hb->count); + submit_bio(rw, bio); + } else { + error = submit_bio_wait(rw, bio); + bio_put(bio); + } + + return error; +} + +static int hib_wait_io(struct hib_bio_batch *hb) +{ + wait_event(hb->wait, atomic_read(&hb->count) == 0); + return hb->error; +} /* * Saving part @@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); @@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -271,10 +348,10 @@ static int swsusp_swap_check(void) * write_page - Write one page to given swap location. * @buf: Address we're writing. * @offset: Offset of the swap page we're writing to. - * @bio_chain: Link the next write BIO here + * @hb: bio completion batch */ -static int write_page(void *buf, sector_t offset, struct bio **bio_chain) +static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) { void *src; int ret; @@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) if (!offset) return -ENOSPC; - if (bio_chain) { + if (hb) { src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY); if (src) { copy_page(src, buf); } else { - ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + ret = hib_wait_io(hb); /* Free pages */ if (ret) return ret; src = (void *)__get_free_page(__GFP_WAIT | @@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) copy_page(src, buf); } else { WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ + hb = NULL; /* Go synchronous */ src = buf; } } } else { src = buf; } - return hib_bio_write_page(offset, src, bio_chain); + return hib_submit_io(WRITE_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -348,7 +425,7 @@ err_close: } static int swap_write_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { int error = 0; sector_t offset; @@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; offset = alloc_swapdev_block(root_swap); - error = write_page(buf, offset, bio_chain); + error = write_page(buf, offset, hb); if (error) return error; handle->cur->entries[handle->k++] = offset; @@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap, bio_chain); + error = write_page(handle->cur, handle->cur_swap, hb); if (error) goto out; clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); + if (hb && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_io(hb); if (error) goto out; /* @@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle, int ret; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", nr_to_write); m = nr_to_write / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); while (1) { ret = snapshot_read_next(snapshot); if (ret <= 0) break; - ret = swap_write_page(handle, data_of(*snapshot), &bio); + ret = swap_write_page(handle, data_of(*snapshot), &hb); if (ret) break; if (!(nr_pages % m)) @@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle, int ret = 0; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; size_t off; @@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle, struct cmp_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for compression to limit memory * footprint. @@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for (;;) { for (thr = 0; thr < nr_threads; thr++) { @@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle, off += PAGE_SIZE) { memcpy(page, data[thr].cmp + off, PAGE_SIZE); - ret = swap_write_page(handle, page, &bio); + ret = swap_write_page(handle, page, &hb); if (ret) goto out_finish; } @@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } out_finish: - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_bio_read_page(offset, tmp->map, NULL); + error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle, } static int swap_read_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { sector_t offset; int error; @@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_bio_read_page(offset, buf, bio_chain); + error = hib_submit_io(READ_SYNC, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle, int ret = 0; ktime_t start; ktime_t stop; - struct bio *bio; + struct hib_bio_batch hb; int err2; unsigned nr_pages; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", nr_to_read); m = nr_to_read / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for ( ; ; ) { ret = snapshot_write_next(snapshot); if (ret <= 0) break; - ret = swap_read_page(handle, data_of(*snapshot), &bio); + ret = swap_read_page(handle, data_of(*snapshot), &hb); if (ret) break; if (snapshot->sync_read) - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) break; if (!(nr_pages % m)) @@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned int m; int ret = 0; int eof = 0; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; unsigned nr_pages; @@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle, struct dec_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for decompression to limit memory * footprint. @@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); ret = snapshot_write_next(snapshot); @@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for(;;) { for (i = 0; !eof && i < want; i++) { - ret = swap_read_page(handle, page[ring], &bio); + ret = swap_read_page(handle, page[ring], &hb); if (ret) { /* * On real read error, finish. On end of data, @@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!asked) break; - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle, * Wait for more data while we are decompressing. */ if (have < LZO_CMP_PAGES && asked) { - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1430,7 +1511,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_bio_read_page(swsusp_resume_block, + error = hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (error) goto put; @@ -1438,7 +1519,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { error = -EINVAL; @@ -1482,10 +1563,10 @@ int swsusp_unmark(void) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Cannot find swsusp signature!\n"); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe22f7510bce..cfeebb499e79 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4387,10 +4387,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); diff --git a/mm/page_io.c b/mm/page_io.c index 6424869e275e..520baa4b04d7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -69,7 +69,7 @@ void end_swap_bio_write(struct bio *bio, int err) bio_put(bio); } -void end_swap_bio_read(struct bio *bio, int err) +static void end_swap_bio_read(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page;