btrfs: introduce new members for extent_map

Introduce two new members for extent_map:

- disk_bytenr
- offset

Both are matching the members with the same name inside
btrfs_file_extent_items.

For now this patch only touches those members when:

- Reading btrfs_file_extent_items from disk
- Inserting new holes
- Merging two extent maps
  With the new disk_bytenr and disk_num_bytes, doing merging would be a
  little more complex, as we have 3 different cases:

  * Both extent maps are referring to the same data extents
    |<----- data extent A ----->|
       |<- em 1 ->|<- em 2 ->|

  * Both extent maps are referring to different data extents
    |<-- data extent A -->|<-- data extent B -->|
               |<- em 1 ->|<- em 2 ->|

  * One of the extent maps is referring to a merged and larger data
    extent that covers both extent maps

    This is not really valid case other than some selftests.
    So this test case would be removed.

  A new helper merge_ondisk_extents() is introduced to handle the above
  valid cases.

To properly assign values for those new members, a new btrfs_file_extent
parameter is introduced to all the involved call sites.

- For NOCOW writes the btrfs_file_extent would be exposed from
  can_nocow_file_extent().

- For other writes, the members can be easily calculated
  As most of them have 0 offset and utilizing the whole on-disk data
  extent.
  The exception is encoded write, but thankfully that interface provided
  offset directly and all other needed info.

For now, both the old members (block_start/block_len/orig_start) are
co-existing with the new members (disk_bytenr/offset), meanwhile all the
critical code is still using the old members only.

The cleanup will happen later after all the old and new members are
properly validated.

There would be some re-ordering for the assignment of the extent_map
members, now we follow the new ordering:

- start and len
  Or file_pos and num_bytes for other structures.

- disk_bytenr and disk_num_bytes
- offset and ram_bytes
- compression

So expect some seemingly unrelated line movement.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2024-04-30 07:53:02 +09:30 committed by David Sterba
parent 87a6962f73
commit 3d2ac99224
6 changed files with 156 additions and 11 deletions

View File

@ -709,6 +709,10 @@ iterate:
em->start = start;
em->orig_start = start;
em->block_start = EXTENT_MAP_HOLE;
em->disk_bytenr = EXTENT_MAP_HOLE;
em->disk_num_bytes = 0;
em->ram_bytes = 0;
em->offset = 0;
em->len = key.offset - start;
break;
}

View File

@ -229,6 +229,61 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
return next->block_start == prev->block_start;
}
/*
* Handle the on-disk data extents merge for @prev and @next.
*
* Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
* For now only uncompressed regular extent can be merged.
*
* @prev and @next will be both updated to point to the new merged range.
* Thus one of them should be removed by the caller.
*/
static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
{
u64 new_disk_bytenr;
u64 new_disk_num_bytes;
u64 new_offset;
/* @prev and @next should not be compressed. */
ASSERT(!extent_map_is_compressed(prev));
ASSERT(!extent_map_is_compressed(next));
/*
* There are two different cases where @prev and @next can be merged.
*
* 1) They are referring to the same data extent:
*
* |<----- data extent A ----->|
* |<- prev ->|<- next ->|
*
* 2) They are referring to different data extents but still adjacent:
*
* |<-- data extent A -->|<-- data extent B -->|
* |<- prev ->|<- next ->|
*
* The calculation here always merges the data extents first, then updates
* @offset using the new data extents.
*
* For case 1), the merged data extent would be the same.
* For case 2), we just merge the two data extents into one.
*/
new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
next->disk_bytenr + next->disk_num_bytes) -
new_disk_bytenr;
new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
prev->disk_bytenr = new_disk_bytenr;
prev->disk_num_bytes = new_disk_num_bytes;
prev->ram_bytes = new_disk_num_bytes;
prev->offset = new_offset;
next->disk_bytenr = new_disk_bytenr;
next->disk_num_bytes = new_disk_num_bytes;
next->ram_bytes = new_disk_num_bytes;
next->offset = new_offset;
}
static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
{
struct extent_map_tree *tree = &inode->extent_tree;
@ -260,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
em->block_len += merge->block_len;
em->block_start = merge->block_start;
em->generation = max(em->generation, merge->generation);
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
merge_ondisk_extents(merge, em);
em->flags |= EXTENT_FLAG_MERGED;
rb_erase(&merge->rb_node, &tree->root);
@ -275,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
em->len += merge->len;
em->block_len += merge->block_len;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
merge_ondisk_extents(em, merge);
rb_erase(&merge->rb_node, &tree->root);
RB_CLEAR_NODE(&merge->rb_node);
em->generation = max(em->generation, merge->generation);
@ -562,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
!extent_map_is_compressed(em)) {
em->block_start += start_diff;
em->block_len = em->len;
em->offset += start_diff;
}
return add_extent_mapping(inode, em, 0);
}
@ -785,14 +846,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->block_len = em->block_len;
else
split->block_len = split->len;
split->disk_bytenr = em->disk_bytenr;
split->disk_num_bytes = max(split->block_len,
em->disk_num_bytes);
split->offset = em->offset;
split->ram_bytes = em->ram_bytes;
} else {
split->orig_start = split->start;
split->block_len = 0;
split->block_start = em->block_start;
split->disk_bytenr = em->disk_bytenr;
split->disk_num_bytes = 0;
split->offset = 0;
split->ram_bytes = split->len;
}
@ -813,13 +878,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->start = end;
split->len = em_end - end;
split->block_start = em->block_start;
split->disk_bytenr = em->disk_bytenr;
split->flags = flags;
split->generation = gen;
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
split->disk_num_bytes = max(em->block_len,
em->disk_num_bytes);
split->offset = em->offset + end - em->start;
split->ram_bytes = em->ram_bytes;
if (compressed) {
split->block_len = em->block_len;
@ -832,10 +898,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->orig_start = em->orig_start;
}
} else {
split->disk_num_bytes = 0;
split->offset = 0;
split->ram_bytes = split->len;
split->orig_start = split->start;
split->block_len = 0;
split->disk_num_bytes = 0;
}
if (extent_map_in_tree(em)) {
@ -989,10 +1056,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
split_pre->len = pre;
split_pre->disk_bytenr = new_logical;
split_pre->disk_num_bytes = split_pre->len;
split_pre->offset = 0;
split_pre->orig_start = split_pre->start;
split_pre->block_start = new_logical;
split_pre->block_len = split_pre->len;
split_pre->disk_num_bytes = split_pre->block_len;
split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags;
split_pre->generation = em->generation;
@ -1007,10 +1076,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
split_mid->disk_bytenr = em->block_start + pre;
split_mid->disk_num_bytes = split_mid->len;
split_mid->offset = 0;
split_mid->orig_start = split_mid->start;
split_mid->block_start = em->block_start + pre;
split_mid->block_len = split_mid->len;
split_mid->disk_num_bytes = split_mid->block_len;
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->generation = em->generation;

View File

@ -70,12 +70,29 @@ struct extent_map {
*/
u64 orig_start;
/*
* The bytenr of the full on-disk extent.
*
* For regular extents it's btrfs_file_extent_item::disk_bytenr.
* For holes it's EXTENT_MAP_HOLE and for inline extents it's
* EXTENT_MAP_INLINE.
*/
u64 disk_bytenr;
/*
* The full on-disk extent length, matching
* btrfs_file_extent_item::disk_num_bytes.
*/
u64 disk_num_bytes;
/*
* Offset inside the decompressed extent.
*
* For regular extents it's btrfs_file_extent_item::offset.
* For holes and inline extents it's 0.
*/
u64 offset;
/*
* The decompressed size of the whole on-disk extent, matching
* btrfs_file_extent_item::ram_bytes.

View File

@ -1295,12 +1295,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->len = btrfs_file_extent_end(path) - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
em->disk_bytenr = EXTENT_MAP_HOLE;
em->disk_num_bytes = 0;
em->offset = 0;
return;
}
em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
em->offset = btrfs_file_extent_offset(leaf, fi);
if (compress_type != BTRFS_COMPRESS_NONE) {
extent_map_set_compression(em, compress_type);
em->block_start = bytenr;
@ -1317,8 +1322,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
ASSERT(extent_start == 0);
em->block_start = EXTENT_MAP_INLINE;
em->disk_bytenr = EXTENT_MAP_INLINE;
em->start = 0;
em->len = fs_info->sectorsize;
em->offset = 0;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().

View File

@ -2349,6 +2349,7 @@ out:
hole_em->orig_start = offset;
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->disk_bytenr = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->disk_num_bytes = 0;
hole_em->generation = trans->transid;

View File

@ -142,6 +142,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 disk_num_bytes,
u64 ram_bytes, int compress_type,
const struct btrfs_file_extent *file_extent,
int type);
static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
@ -1153,6 +1154,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_extent *ordered;
struct btrfs_file_extent file_extent;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_state *cached = NULL;
@ -1199,6 +1201,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
lock_extent(io_tree, start, end, &cached);
/* Here we're doing allocation and writeback of the compressed pages */
file_extent.disk_bytenr = ins.objectid;
file_extent.disk_num_bytes = ins.offset;
file_extent.ram_bytes = async_extent->ram_size;
file_extent.num_bytes = async_extent->ram_size;
file_extent.offset = 0;
file_extent.compression = async_extent->compress_type;
em = create_io_em(inode, start,
async_extent->ram_size, /* len */
start, /* orig_start */
@ -1207,6 +1216,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
ins.offset, /* orig_block_len */
async_extent->ram_size, /* ram_bytes */
async_extent->compress_type,
&file_extent,
BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@ -1396,6 +1406,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
while (num_bytes > 0) {
struct btrfs_ordered_extent *ordered;
struct btrfs_file_extent file_extent;
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
@ -1432,6 +1443,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
extent_reserved = true;
ram_size = ins.offset;
file_extent.disk_bytenr = ins.objectid;
file_extent.disk_num_bytes = ins.offset;
file_extent.num_bytes = ins.offset;
file_extent.ram_bytes = ins.offset;
file_extent.offset = 0;
file_extent.compression = BTRFS_COMPRESS_NONE;
lock_extent(&inode->io_tree, start, start + ram_size - 1,
&cached);
@ -1443,6 +1460,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
ins.offset, /* orig_block_len */
ram_size, /* ram_bytes */
BTRFS_COMPRESS_NONE, /* compress_type */
&file_extent,
BTRFS_ORDERED_REGULAR /* type */);
if (IS_ERR(em)) {
unlock_extent(&inode->io_tree, start,
@ -2181,6 +2199,7 @@ must_cow:
nocow_args.num_bytes, /* block_len */
nocow_args.disk_num_bytes, /* orig_block_len */
ram_bytes, BTRFS_COMPRESS_NONE,
&nocow_args.file_extent,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
unlock_extent(&inode->io_tree, cur_offset,
@ -5013,6 +5032,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
hole_em->orig_start = cur_offset;
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->disk_bytenr = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->disk_num_bytes = 0;
hole_em->ram_bytes = hole_size;
@ -6881,6 +6901,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
}
em->start = EXTENT_MAP_HOLE;
em->orig_start = EXTENT_MAP_HOLE;
em->disk_bytenr = EXTENT_MAP_HOLE;
em->len = (u64)-1;
em->block_len = (u64)-1;
@ -7046,7 +7067,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
const u64 block_len,
const u64 orig_block_len,
const u64 ram_bytes,
const int type)
const int type,
const struct btrfs_file_extent *file_extent)
{
struct extent_map *em = NULL;
struct btrfs_ordered_extent *ordered;
@ -7055,7 +7077,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
em = create_io_em(inode, start, len, orig_start, block_start,
block_len, orig_block_len, ram_bytes,
BTRFS_COMPRESS_NONE, /* compress_type */
type);
file_extent, type);
if (IS_ERR(em))
goto out;
}
@ -7086,6 +7108,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_file_extent file_extent;
struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
@ -7104,9 +7127,16 @@ again:
if (ret)
return ERR_PTR(ret);
file_extent.disk_bytenr = ins.objectid;
file_extent.disk_num_bytes = ins.offset;
file_extent.num_bytes = ins.offset;
file_extent.ram_bytes = ins.offset;
file_extent.offset = 0;
file_extent.compression = BTRFS_COMPRESS_NONE;
em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
ins.objectid, ins.offset, ins.offset,
ins.offset, BTRFS_ORDERED_REGULAR);
ins.offset, BTRFS_ORDERED_REGULAR,
&file_extent);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(em))
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
@ -7348,6 +7378,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 disk_num_bytes,
u64 ram_bytes, int compress_type,
const struct btrfs_file_extent *file_extent,
int type)
{
struct extent_map *em;
@ -7405,9 +7436,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
em->disk_bytenr = file_extent->disk_bytenr;
em->disk_num_bytes = disk_num_bytes;
em->ram_bytes = ram_bytes;
em->generation = -1;
em->offset = file_extent->offset;
em->flags |= EXTENT_FLAG_PINNED;
if (type == BTRFS_ORDERED_COMPRESSED)
extent_map_set_compression(em, compress_type);
@ -7431,6 +7464,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_file_extent file_extent;
struct extent_map *em = *map;
int type;
u64 block_start, orig_start, orig_block_len, ram_bytes;
@ -7461,7 +7495,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
block_start = em->block_start + (start - em->start);
if (can_nocow_extent(inode, start, &len, &orig_start,
&orig_block_len, &ram_bytes, NULL, false, false) == 1) {
&orig_block_len, &ram_bytes,
&file_extent, false, false) == 1) {
bg = btrfs_inc_nocow_writers(fs_info, block_start);
if (bg)
can_nocow = true;
@ -7489,7 +7524,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
orig_start, block_start,
len, orig_block_len,
ram_bytes, type);
ram_bytes, type,
&file_extent);
btrfs_dec_nocow_writers(bg);
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
@ -9581,6 +9617,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->orig_start = cur_offset;
em->len = ins.offset;
em->block_start = ins.objectid;
em->disk_bytenr = ins.objectid;
em->offset = 0;
em->block_len = ins.offset;
em->disk_num_bytes = ins.offset;
em->ram_bytes = ins.offset;
@ -10147,6 +10185,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_ordered_extent *ordered;
struct btrfs_file_extent file_extent;
int compression;
size_t orig_count;
u64 start, end;
@ -10322,10 +10361,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
goto out_delalloc_release;
extent_reserved = true;
file_extent.disk_bytenr = ins.objectid;
file_extent.disk_num_bytes = ins.offset;
file_extent.num_bytes = num_bytes;
file_extent.ram_bytes = ram_bytes;
file_extent.offset = encoded->unencoded_offset;
file_extent.compression = compression;
em = create_io_em(inode, start, num_bytes,
start - encoded->unencoded_offset, ins.objectid,
ins.offset, ins.offset, ram_bytes, compression,
BTRFS_ORDERED_COMPRESSED);
&file_extent, BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_free_reserved;