btrfs: lzo: fix and simplify the inline extent decompression

[BUG]
If we have a filesystem with 4k sectorsize, and an inlined compressed
extent created like this:

	item 4 key (257 INODE_ITEM 0) itemoff 15863 itemsize 160
		generation 8 transid 8 size 4096 nbytes 4096
		block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
		sequence 1 flags 0x0(none)
	item 5 key (257 INODE_REF 256) itemoff 15839 itemsize 24
		index 2 namelen 14 name: source_inlined
	item 6 key (257 EXTENT_DATA 0) itemoff 15770 itemsize 69
		generation 8 type 0 (inline)
		inline extent data size 48 ram_bytes 4096 compression 2 (lzo)

Then trying to reflink that extent in an aarch64 system with 64K page
size, the reflink would just fail:

  # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest
  XFS_IOC_CLONE_RANGE: Input/output error

[CAUSE]
In zlib_decompress(), we didn't treat @start_byte as just a page offset,
but also use it as an indicator on whether we should error out, without
any proper explanation (this is from the very beginning of btrfs).

In reality, for subpage cases, although @start_byte can be non-zero,
we should never switch input/output buffer nor error out, since the whole
input/output buffer should never exceed one sector.

Note: The above assumption is only not true if we're going to support
multi-page sectorsize.

Thus the current code using @start_byte as a condition to switch
input/output buffer or finish the decompression is completely incorrect.

[FIX]
The fix involves several modifications:

- Rename @start_byte to @dest_pgoff to properly express its meaning

- Use @sectorsize other than PAGE_SIZE to properly initialize the
  output buffer size

- Use correct destination offset inside the destination page

- Use memcpy_to_page() to copy the contents to the destination page

- Use memzero_page() to zero out the tailing part

- Consider early end as an error

After the fix, even on 64K page sized aarch64, above reflink now
works as expected:

  # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest
  linked 4096/4096 bytes at offset 61440

And results the correct file layout:

	item 9 key (258 INODE_ITEM 0) itemoff 15542 itemsize 160
		generation 10 transid 10 size 65536 nbytes 4096
		block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
		sequence 1 flags 0x0(none)
	item 10 key (258 INODE_REF 256) itemoff 15528 itemsize 14
		index 3 namelen 4 name: dest
	item 11 key (258 XATTR_ITEM 3817753667) itemoff 15445 itemsize 83
		location key (0 UNKNOWN.0 0) type XATTR
		transid 10 data_len 37 name_len 16
		name: security.selinux
		data unconfined_u:object_r:unlabeled_t:s0
	item 12 key (258 EXTENT_DATA 61440) itemoff 15392 itemsize 53
		generation 10 type 1 (regular)
		extent data disk byte 13631488 nr 4096
		extent data offset 0 nr 4096 ram 4096
		extent compression 0 (none)

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2024-01-08 19:38:45 +10:30 committed by David Sterba
parent 2c25716dcc
commit 6a69631ec9
2 changed files with 10 additions and 26 deletions

View File

@ -159,7 +159,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long start_byte, size_t srclen,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);

View File

@ -425,16 +425,16 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
}
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long start_byte, size_t srclen,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
size_t max_segment_len = WORKSPACE_BUF_LENGTH;
int ret = 0;
char *kaddr;
unsigned long bytes;
if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
return -EUCLEAN;
@ -451,7 +451,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
}
data_in += LZO_LEN;
out_len = PAGE_SIZE;
out_len = sectorsize;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) {
pr_warn("BTRFS: decompress failed!\n");
@ -459,29 +459,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
goto out;
}
if (out_len < start_byte) {
ASSERT(out_len <= sectorsize);
memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len);
/* Early end, considered as an error. */
if (unlikely(out_len < destlen)) {
ret = -EIO;
goto out;
memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len);
}
/*
* the caller is already checking against PAGE_SIZE, but lets
* move this check closer to the memcpy/memset
*/
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
kaddr = kmap_local_page(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
* btrfs_getblock is doing a zero on the tail of the page too,
* but this will cover anything missing from the decompressed
* data.
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
kunmap_local(kaddr);
out:
return ret;
}