dm: add dm_bio_rewind() API to DM core
Commit 7759eb23fd98 ("block: remove bio_rewind_iter()") removed
a similar API for the following reasons:
```
It is pointed that bio_rewind_iter() is one very bad API[1]:
1) bio size may not be restored after rewinding
2) it causes some bogus change, such as 5151842b9d8732 (block: reset
bi_iter.bi_done after splitting bio)
3) rewinding really makes things complicated wrt. bio splitting
4) unnecessary updating of .bi_done in fast path
[1] https://marc.info/?t=153549924200005&r=1&w=2
So this patch takes Kent's suggestion to restore one bio into its original
state via saving bio iterator(struct bvec_iter) in bio_integrity_prep(),
given now bio_rewind_iter() is only used by bio integrity code.
```
However, saving off a copy of the 32 bytes bio->bi_iter in case rewind
needed isn't efficient because it bloats per-bio-data for what is an
unlikely case. That suggestion also ignores the need to restore
crypto and integrity info.
Add dm_bio_rewind() API for a specific use-case that is much more narrow
than the previous more generic rewind code that was reverted:
1) most bios have a fixed end sector since bio split is done from front
of the bio, if driver just records how many sectors between current
bio's start sector and the original bio's end sector, the original
position can be restored. Keeping the original bio's end sector
fixed is a _hard_ requirement for this interface!
2) if a bio's end sector won't change (usually bio_trim() isn't
called, or in the case of DM it preserves original bio), user can
restore the original position by storing sector offset from the
current ->bi_iter.bi_sector to bio's end sector; together with
saving bio size, only 8 bytes is needed to restore to original
bio.
3) DM's requeue use case: when BLK_STS_DM_REQUEUE happens, DM core
needs to restore to an "original bio" which represents the current
dm_io to be requeued (which may be a subset of the original bio).
By storing the sector offset from the original bio's end sector and
dm_io's size, dm_bio_rewind() can restore such original bio. See
commit 7dd76d1feec7 ("dm: improve bio splitting and associated IO
accounting") for more details on how DM does this. Leveraging this,
allows DM core to shift the need for bio cloning from bio-split
time (during IO submission) to the less likely BLK_STS_DM_REQUEUE
handling (after IO completes with that error).
4) Unlike the original rewind API, dm_bio_rewind() doesn't add .bi_done
to bvec_iter and there is no effect on the fast path.
Implement dm_bio_rewind() by factoring out clear helpers that it calls:
dm_bio_integrity_rewind, dm_bio_crypt_rewind and dm_bio_rewind_iter.
DM is able to ensure that dm_bio_rewind() is used safely but, given
the constraint that the bio's end must never change, other
hypothetical future callers may not take the same care. So make
dm_bio_rewind() and all supporting code local to DM to avoid risk of
hypothetical abuse. A "dm_" prefix was added to all functions to avoid
any namespace collisions.
Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-06-24 22:12:52 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/*
|
|
|
|
* Copyright 2022 Red Hat, Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/bio.h>
|
|
|
|
#include <linux/blk-crypto.h>
|
|
|
|
#include <linux/blk-integrity.h>
|
|
|
|
|
|
|
|
#include "dm-core.h"
|
|
|
|
|
|
|
|
static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv,
|
|
|
|
struct bvec_iter *iter,
|
|
|
|
unsigned int bytes)
|
|
|
|
{
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
iter->bi_size += bytes;
|
|
|
|
if (bytes <= iter->bi_bvec_done) {
|
|
|
|
iter->bi_bvec_done -= bytes;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes -= iter->bi_bvec_done;
|
|
|
|
idx = iter->bi_idx - 1;
|
|
|
|
|
|
|
|
while (idx >= 0 && bytes && bytes > bv[idx].bv_len) {
|
|
|
|
bytes -= bv[idx].bv_len;
|
|
|
|
idx--;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (WARN_ONCE(idx < 0 && bytes,
|
|
|
|
"Attempted to rewind iter beyond bvec's boundaries\n")) {
|
|
|
|
iter->bi_size -= bytes;
|
|
|
|
iter->bi_bvec_done = 0;
|
|
|
|
iter->bi_idx = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
iter->bi_idx = idx;
|
|
|
|
iter->bi_bvec_done = bv[idx].bv_len - bytes;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* dm_bio_integrity_rewind - Rewind integrity vector
|
|
|
|
* @bio: bio whose integrity vector to update
|
|
|
|
* @bytes_done: number of data bytes to rewind
|
|
|
|
*
|
|
|
|
* Description: This function calculates how many integrity bytes the
|
|
|
|
* number of completed data bytes correspond to and rewind the
|
|
|
|
* integrity vector accordingly.
|
|
|
|
*/
|
|
|
|
static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done)
|
|
|
|
{
|
|
|
|
struct bio_integrity_payload *bip = bio_integrity(bio);
|
|
|
|
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
|
|
|
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
|
|
|
|
|
|
|
|
bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9);
|
|
|
|
dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* CONFIG_BLK_DEV_INTEGRITY */
|
|
|
|
|
|
|
|
static inline void dm_bio_integrity_rewind(struct bio *bio,
|
|
|
|
unsigned int bytes_done)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_BLK_INLINE_ENCRYPTION)
|
|
|
|
|
|
|
|
/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */
|
|
|
|
static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
|
|
|
|
unsigned int dec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
|
|
|
|
u64 prev = dun[i];
|
|
|
|
|
|
|
|
dun[i] -= dec;
|
|
|
|
if (dun[i] > prev)
|
|
|
|
dec = 1;
|
|
|
|
else
|
|
|
|
dec = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
|
|
|
|
{
|
|
|
|
struct bio_crypt_ctx *bc = bio->bi_crypt_context;
|
|
|
|
|
|
|
|
dm_bio_crypt_dun_decrement(bc->bc_dun,
|
|
|
|
bytes >> bc->bc_key->data_unit_size_bits);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
|
|
|
|
|
|
|
|
static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static inline void dm_bio_rewind_iter(const struct bio *bio,
|
|
|
|
struct bvec_iter *iter, unsigned int bytes)
|
|
|
|
{
|
|
|
|
iter->bi_sector -= bytes >> 9;
|
|
|
|
|
|
|
|
/* No advance means no rewind */
|
|
|
|
if (bio_no_advance_iter(bio))
|
|
|
|
iter->bi_size += bytes;
|
|
|
|
else
|
|
|
|
dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes.
|
|
|
|
* @bio: bio to rewind
|
|
|
|
* @bytes: how many bytes to rewind
|
|
|
|
*
|
|
|
|
* WARNING:
|
|
|
|
* Caller must ensure that @bio has a fixed end sector, to allow
|
|
|
|
* rewinding from end of bio and restoring its original position.
|
|
|
|
* Caller is also responsibile for restoring bio's size.
|
|
|
|
*/
|
2022-06-24 22:12:55 +08:00
|
|
|
static void dm_bio_rewind(struct bio *bio, unsigned bytes)
|
dm: add dm_bio_rewind() API to DM core
Commit 7759eb23fd98 ("block: remove bio_rewind_iter()") removed
a similar API for the following reasons:
```
It is pointed that bio_rewind_iter() is one very bad API[1]:
1) bio size may not be restored after rewinding
2) it causes some bogus change, such as 5151842b9d8732 (block: reset
bi_iter.bi_done after splitting bio)
3) rewinding really makes things complicated wrt. bio splitting
4) unnecessary updating of .bi_done in fast path
[1] https://marc.info/?t=153549924200005&r=1&w=2
So this patch takes Kent's suggestion to restore one bio into its original
state via saving bio iterator(struct bvec_iter) in bio_integrity_prep(),
given now bio_rewind_iter() is only used by bio integrity code.
```
However, saving off a copy of the 32 bytes bio->bi_iter in case rewind
needed isn't efficient because it bloats per-bio-data for what is an
unlikely case. That suggestion also ignores the need to restore
crypto and integrity info.
Add dm_bio_rewind() API for a specific use-case that is much more narrow
than the previous more generic rewind code that was reverted:
1) most bios have a fixed end sector since bio split is done from front
of the bio, if driver just records how many sectors between current
bio's start sector and the original bio's end sector, the original
position can be restored. Keeping the original bio's end sector
fixed is a _hard_ requirement for this interface!
2) if a bio's end sector won't change (usually bio_trim() isn't
called, or in the case of DM it preserves original bio), user can
restore the original position by storing sector offset from the
current ->bi_iter.bi_sector to bio's end sector; together with
saving bio size, only 8 bytes is needed to restore to original
bio.
3) DM's requeue use case: when BLK_STS_DM_REQUEUE happens, DM core
needs to restore to an "original bio" which represents the current
dm_io to be requeued (which may be a subset of the original bio).
By storing the sector offset from the original bio's end sector and
dm_io's size, dm_bio_rewind() can restore such original bio. See
commit 7dd76d1feec7 ("dm: improve bio splitting and associated IO
accounting") for more details on how DM does this. Leveraging this,
allows DM core to shift the need for bio cloning from bio-split
time (during IO submission) to the less likely BLK_STS_DM_REQUEUE
handling (after IO completes with that error).
4) Unlike the original rewind API, dm_bio_rewind() doesn't add .bi_done
to bvec_iter and there is no effect on the fast path.
Implement dm_bio_rewind() by factoring out clear helpers that it calls:
dm_bio_integrity_rewind, dm_bio_crypt_rewind and dm_bio_rewind_iter.
DM is able to ensure that dm_bio_rewind() is used safely but, given
the constraint that the bio's end must never change, other
hypothetical future callers may not take the same care. So make
dm_bio_rewind() and all supporting code local to DM to avoid risk of
hypothetical abuse. A "dm_" prefix was added to all functions to avoid
any namespace collisions.
Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
2022-06-24 22:12:52 +08:00
|
|
|
{
|
|
|
|
if (bio_integrity(bio))
|
|
|
|
dm_bio_integrity_rewind(bio, bytes);
|
|
|
|
|
|
|
|
if (bio_has_crypt_ctx(bio))
|
|
|
|
dm_bio_crypt_rewind(bio, bytes);
|
|
|
|
|
|
|
|
dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
|
|
|
|
}
|
2022-06-24 22:12:55 +08:00
|
|
|
|
|
|
|
void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
|
|
|
|
{
|
|
|
|
struct bio *orig = io->orig_bio;
|
|
|
|
struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
|
|
|
|
GFP_NOIO, bs);
|
|
|
|
/*
|
|
|
|
* dm_bio_rewind can restore to previous position since the
|
|
|
|
* end sector is fixed for original bio, but we still need
|
|
|
|
* to restore bio's size manually (using io->sectors).
|
|
|
|
*/
|
|
|
|
dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
|
|
|
|
orig->bi_iter.bi_size));
|
|
|
|
bio_trim(new_orig, 0, io->sectors);
|
|
|
|
|
|
|
|
bio_chain(new_orig, orig);
|
|
|
|
/*
|
|
|
|
* __bi_remaining was increased (by dm_split_and_process_bio),
|
|
|
|
* so must drop the one added in bio_chain.
|
|
|
|
*/
|
|
|
|
atomic_dec(&orig->__bi_remaining);
|
|
|
|
io->orig_bio = new_orig;
|
|
|
|
}
|