diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 94f5c368c683..629ae45bb18e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -110,6 +110,24 @@ static void end_reshape(struct r10conf *conf); #define raid10_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) +/* + * 'strct resync_pages' stores actual pages used for doing the resync + * IO, and it is per-bio, so make .bi_private points to it. + */ +static inline struct resync_pages *get_resync_pages(struct bio *bio) +{ + return bio->bi_private; +} + +/* + * for resync bio, r10bio pointer can be retrieved from the per-bio + * 'struct resync_pages'. + */ +static inline struct r10bio *get_resync_r10bio(struct bio *bio) +{ + return get_resync_pages(bio)->raid_bio; +} + static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { struct r10conf *conf = data; @@ -140,11 +158,11 @@ static void r10bio_pool_free(void *r10_bio, void *data) static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) { struct r10conf *conf = data; - struct page *page; struct r10bio *r10_bio; struct bio *bio; - int i, j; - int nalloc; + int j; + int nalloc, nalloc_rp; + struct resync_pages *rps; r10_bio = r10bio_pool_alloc(gfp_flags, conf); if (!r10_bio) @@ -156,6 +174,15 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) else nalloc = 2; /* recovery */ + /* allocate once for all bios */ + if (!conf->have_replacement) + nalloc_rp = nalloc; + else + nalloc_rp = nalloc * 2; + rps = kmalloc(sizeof(struct resync_pages) * nalloc_rp, gfp_flags); + if (!rps) + goto out_free_r10bio; + /* * Allocate bios. */ @@ -175,36 +202,40 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) * Allocate RESYNC_PAGES data pages and attach them * where needed. */ - for (j = 0 ; j < nalloc; j++) { + for (j = 0; j < nalloc; j++) { struct bio *rbio = r10_bio->devs[j].repl_bio; - bio = r10_bio->devs[j].bio; - for (i = 0; i < RESYNC_PAGES; i++) { - if (j > 0 && !test_bit(MD_RECOVERY_SYNC, - &conf->mddev->recovery)) { - /* we can share bv_page's during recovery - * and reshape */ - struct bio *rbio = r10_bio->devs[0].bio; - page = rbio->bi_io_vec[i].bv_page; - get_page(page); - } else - page = alloc_page(gfp_flags); - if (unlikely(!page)) - goto out_free_pages; + struct resync_pages *rp, *rp_repl; - bio->bi_io_vec[i].bv_page = page; - if (rbio) - rbio->bi_io_vec[i].bv_page = page; + rp = &rps[j]; + if (rbio) + rp_repl = &rps[nalloc + j]; + + bio = r10_bio->devs[j].bio; + + if (!j || test_bit(MD_RECOVERY_SYNC, + &conf->mddev->recovery)) { + if (resync_alloc_pages(rp, gfp_flags)) + goto out_free_pages; + } else { + memcpy(rp, &rps[0], sizeof(*rp)); + resync_get_all_pages(rp); + } + + rp->idx = 0; + rp->raid_bio = r10_bio; + bio->bi_private = rp; + if (rbio) { + memcpy(rp_repl, rp, sizeof(*rp)); + rbio->bi_private = rp_repl; } } return r10_bio; out_free_pages: - for ( ; i > 0 ; i--) - safe_put_page(bio->bi_io_vec[i-1].bv_page); - while (j--) - for (i = 0; i < RESYNC_PAGES ; i++) - safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); + while (--j >= 0) + resync_free_pages(&rps[j * 2]); + j = 0; out_free_bio: for ( ; j < nalloc; j++) { @@ -213,30 +244,34 @@ out_free_bio: if (r10_bio->devs[j].repl_bio) bio_put(r10_bio->devs[j].repl_bio); } + kfree(rps); +out_free_r10bio: r10bio_pool_free(r10_bio, conf); return NULL; } static void r10buf_pool_free(void *__r10_bio, void *data) { - int i; struct r10conf *conf = data; struct r10bio *r10bio = __r10_bio; int j; + struct resync_pages *rp = NULL; - for (j=0; j < conf->copies; j++) { + for (j = conf->copies; j--; ) { struct bio *bio = r10bio->devs[j].bio; - if (bio) { - for (i = 0; i < RESYNC_PAGES; i++) { - safe_put_page(bio->bi_io_vec[i].bv_page); - bio->bi_io_vec[i].bv_page = NULL; - } - bio_put(bio); - } + + rp = get_resync_pages(bio); + resync_free_pages(rp); + bio_put(bio); + bio = r10bio->devs[j].repl_bio; if (bio) bio_put(bio); } + + /* resync pages array stored in the 1st bio's .bi_private */ + kfree(rp); + r10bio_pool_free(r10bio, conf); } @@ -1917,7 +1952,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d) static void end_sync_read(struct bio *bio) { - struct r10bio *r10_bio = bio->bi_private; + struct r10bio *r10_bio = get_resync_r10bio(bio); struct r10conf *conf = r10_bio->mddev->private; int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); @@ -1926,6 +1961,7 @@ static void end_sync_read(struct bio *bio) static void end_reshape_read(struct bio *bio) { + /* reshape read bio isn't allocated from r10buf_pool */ struct r10bio *r10_bio = bio->bi_private; __end_sync_read(r10_bio, bio, r10_bio->read_slot); @@ -1960,7 +1996,7 @@ static void end_sync_request(struct r10bio *r10_bio) static void end_sync_write(struct bio *bio) { - struct r10bio *r10_bio = bio->bi_private; + struct r10bio *r10_bio = get_resync_r10bio(bio); struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; int d; @@ -2040,6 +2076,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) for (i=0 ; i < conf->copies ; i++) { int j, d; struct md_rdev *rdev; + struct resync_pages *rp; tbio = r10_bio->devs[i].bio; @@ -2081,11 +2118,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * First we need to fixup bv_offset, bv_len and * bi_vecs, as the read request might have corrupted these */ + rp = get_resync_pages(tbio); bio_reset(tbio); tbio->bi_vcnt = vcnt; tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; - tbio->bi_private = r10_bio; + rp->raid_bio = r10_bio; + tbio->bi_private = rp; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; tbio->bi_end_io = end_sync_write; bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); @@ -3149,10 +3188,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } } bio = r10_bio->devs[0].bio; - bio_reset(bio); bio->bi_next = biolist; biolist = bio; - bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio_set_op_attrs(bio, REQ_OP_READ, 0); if (test_bit(FailFast, &rdev->flags)) @@ -3176,10 +3213,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (!test_bit(In_sync, &mrdev->flags)) { bio = r10_bio->devs[1].bio; - bio_reset(bio); bio->bi_next = biolist; biolist = bio; - bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr @@ -3204,10 +3239,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (mreplace == NULL || bio == NULL || test_bit(Faulty, &mreplace->flags)) break; - bio_reset(bio); bio->bi_next = biolist; biolist = bio; - bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + @@ -3329,7 +3362,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->devs[i].repl_bio->bi_end_io = NULL; bio = r10_bio->devs[i].bio; - bio_reset(bio); bio->bi_error = -EIO; rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[d].rdev); @@ -3354,7 +3386,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, atomic_inc(&r10_bio->remaining); bio->bi_next = biolist; biolist = bio; - bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio_set_op_attrs(bio, REQ_OP_READ, 0); if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) @@ -3373,13 +3404,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; - bio_reset(bio); bio->bi_error = -EIO; sector = r10_bio->devs[i].addr; bio->bi_next = biolist; biolist = bio; - bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) @@ -3418,7 +3447,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (len == 0) break; for (bio= biolist ; bio ; bio=bio->bi_next) { - page = bio->bi_io_vec[bio->bi_vcnt].bv_page; + struct resync_pages *rp = get_resync_pages(bio); + page = resync_fetch_page(rp, rp->idx++); /* * won't fail because the vec table is big enough * to hold all these pages @@ -3427,7 +3457,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } nr_sectors += len>>9; sector_nr += len>>9; - } while (biolist->bi_vcnt < RESYNC_PAGES); + } while (get_resync_pages(biolist)->idx < RESYNC_PAGES); r10_bio->sectors = nr_sectors; while (biolist) { @@ -3435,7 +3465,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, biolist = biolist->bi_next; bio->bi_next = NULL; - r10_bio = bio->bi_private; + r10_bio = get_resync_r10bio(bio); r10_bio->sectors = nr_sectors; if (bio->bi_end_io == end_sync_read) { @@ -4326,6 +4356,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, struct bio *blist; struct bio *bio, *read_bio; int sectors_done = 0; + struct page **pages; if (sector_nr == 0) { /* If restarting in the middle, skip the initial sectors */ @@ -4476,11 +4507,9 @@ read_more: if (!rdev2 || test_bit(Faulty, &rdev2->flags)) continue; - bio_reset(b); b->bi_bdev = rdev2->bdev; b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; - b->bi_private = r10_bio; b->bi_end_io = end_reshape_write; bio_set_op_attrs(b, REQ_OP_WRITE, 0); b->bi_next = blist; @@ -4490,8 +4519,9 @@ read_more: /* Now add as many pages as possible to all of these bios. */ nr_sectors = 0; + pages = get_resync_pages(r10_bio->devs[0].bio)->pages; for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) { - struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page; + struct page *page = pages[s / (PAGE_SIZE >> 9)]; int len = (max_sectors - s) << 9; if (len > PAGE_SIZE) len = PAGE_SIZE; @@ -4675,7 +4705,7 @@ static int handle_reshape_read_error(struct mddev *mddev, static void end_reshape_write(struct bio *bio) { - struct r10bio *r10_bio = bio->bi_private; + struct r10bio *r10_bio = get_resync_r10bio(bio); struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; int d;