linux/drivers/md/dm-io-rewind.c
Ming Lei 8b211aaccb dm: add two stage requeue mechanism
Commit 61b6e2e532 ("dm: fix BLK_STS_DM_REQUEUE handling when dm_io
represents split bio") reverted DM core's bio splitting back to using
bio_split()+bio_chain() because it was found that otherwise DM's
BLK_STS_DM_REQUEUE would trigger a live-lock waiting for bio
completion that would never occur.

Restore using bio_trim()+bio_inc_remaining(), like was done in commit
7dd76d1fee ("dm: improve bio splitting and associated IO
accounting"), but this time with proper handling for the above
scenario that is covered in more detail in the commit header for
61b6e2e532.

Solve this issue by adding a two staged dm_io requeue mechanism that
uses the new dm_bio_rewind() via dm_io_rewind():

1) requeue the dm_io into the requeue_list added to struct
   mapped_device, and schedule it via new added requeue work. This
   workqueue just clones the dm_io->orig_bio (which DM saves and
   ensures its end sector isn't modified). dm_io_rewind() uses the
   sectors and sectors_offset members of the dm_io that are recorded
   relative to the end of orig_bio: dm_bio_rewind()+bio_trim() are
   then used to make that cloned bio reflect the subset of the
   original bio that is represented by the dm_io that is being
   requeued.

2) the 2nd stage requeue is same with original requeue, but
   io->orig_bio points to new cloned bio (which matches the requeued
   dm_io as described above).

This allows DM core to shift the need for bio cloning from bio-split
time (during IO submission) to the less likely BLK_STS_DM_REQUEUE
handling (after IO completes with that error).

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-07-07 11:49:32 -04:00

167 lines
4.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2022 Red Hat, Inc.
*/
#include <linux/bio.h>
#include <linux/blk-crypto.h>
#include <linux/blk-integrity.h>
#include "dm-core.h"
static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv,
struct bvec_iter *iter,
unsigned int bytes)
{
int idx;
iter->bi_size += bytes;
if (bytes <= iter->bi_bvec_done) {
iter->bi_bvec_done -= bytes;
return true;
}
bytes -= iter->bi_bvec_done;
idx = iter->bi_idx - 1;
while (idx >= 0 && bytes && bytes > bv[idx].bv_len) {
bytes -= bv[idx].bv_len;
idx--;
}
if (WARN_ONCE(idx < 0 && bytes,
"Attempted to rewind iter beyond bvec's boundaries\n")) {
iter->bi_size -= bytes;
iter->bi_bvec_done = 0;
iter->bi_idx = 0;
return false;
}
iter->bi_idx = idx;
iter->bi_bvec_done = bv[idx].bv_len - bytes;
return true;
}
#if defined(CONFIG_BLK_DEV_INTEGRITY)
/**
* dm_bio_integrity_rewind - Rewind integrity vector
* @bio: bio whose integrity vector to update
* @bytes_done: number of data bytes to rewind
*
* Description: This function calculates how many integrity bytes the
* number of completed data bytes correspond to and rewind the
* integrity vector accordingly.
*/
static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9);
dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline void dm_bio_integrity_rewind(struct bio *bio,
unsigned int bytes_done)
{
return;
}
#endif
#if defined(CONFIG_BLK_INLINE_ENCRYPTION)
/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */
static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
unsigned int dec)
{
int i;
for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
u64 prev = dun[i];
dun[i] -= dec;
if (dun[i] > prev)
dec = 1;
else
dec = 0;
}
}
static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
{
struct bio_crypt_ctx *bc = bio->bi_crypt_context;
dm_bio_crypt_dun_decrement(bc->bc_dun,
bytes >> bc->bc_key->data_unit_size_bits);
}
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
{
return;
}
#endif
static inline void dm_bio_rewind_iter(const struct bio *bio,
struct bvec_iter *iter, unsigned int bytes)
{
iter->bi_sector -= bytes >> 9;
/* No advance means no rewind */
if (bio_no_advance_iter(bio))
iter->bi_size += bytes;
else
dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
}
/**
* dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes.
* @bio: bio to rewind
* @bytes: how many bytes to rewind
*
* WARNING:
* Caller must ensure that @bio has a fixed end sector, to allow
* rewinding from end of bio and restoring its original position.
* Caller is also responsibile for restoring bio's size.
*/
static void dm_bio_rewind(struct bio *bio, unsigned bytes)
{
if (bio_integrity(bio))
dm_bio_integrity_rewind(bio, bytes);
if (bio_has_crypt_ctx(bio))
dm_bio_crypt_rewind(bio, bytes);
dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
}
void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
{
struct bio *orig = io->orig_bio;
struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
GFP_NOIO, bs);
/*
* dm_bio_rewind can restore to previous position since the
* end sector is fixed for original bio, but we still need
* to restore bio's size manually (using io->sectors).
*/
dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
orig->bi_iter.bi_size));
bio_trim(new_orig, 0, io->sectors);
bio_chain(new_orig, orig);
/*
* __bi_remaining was increased (by dm_split_and_process_bio),
* so must drop the one added in bio_chain.
*/
atomic_dec(&orig->__bi_remaining);
io->orig_bio = new_orig;
}