mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
for-5.18-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmI44SgACgkQxWXV+ddt WDtzyg//YgMKr05jRsU3I/pIQ9znuKZmmllThwF63ZRG4PvKz2QfzvKdrMuzNjru 5kHbG59iJqtLmU/aVsdp8mL6mmg5U3Ym2bIRsrW5m4HTtTowKdirvL/lQ3/tWm8j CSDJhUdCL2SwFjpru+4cxOeHLXNSfsk4BoCu8nsLitL+oXv/EPo/dkmu6nPjiMY3 RjsIDBeDEf7J20KOuP/qJuN2YOAT7TeISPD3Ow4aDsmndWQ8n6KehEmAZb7QuqZQ SYubZ2wTb9HuPH/qpiTIA7innBIr+JkYtUYlz2xxixM2BUWNfqD6oKHw9RgOY5Sg CULFssw0i7cgGKsvuPJw1zdM002uG4wwXKigGiyljTVWvxneyr4mNDWiGad+LyFJ XWhnABPidkLs/1zbUkJ23DVub5VlfZsypkFDJAUXI0nGu3VrhjDfTYMa8eCe2L/F YuGG6CrAC+5K/arKAWTVj7hOb+52UzBTEBJz60LJJ6dS9eQoBy857V6pfo7w7ukZ t/tqA6q75O4tk/G3Ix3V1CjuAH3kJE6qXrvBxhpu8aZNjofopneLyGqS5oahpcE8 8edtT+ZZhNuU9sLSEJCJATVxXRDdNzpQ8CHgOR5HOUbmM/vwKNzHPfRQzDnImznw UaUlFaaHwK17M6Y/6CnMecz26U2nVSJ7pyh39mb784XYe2a1efE= =YARd -----END PGP SIGNATURE----- Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "This contains feature updates, performance improvements, preparatory and core work and some related VFS updates: Features: - encoded read/write ioctls, allows user space to read or write raw data directly to extents (now compressed, encrypted in the future), will be used by send/receive v2 where it saves processing time - zoned mode now works with metadata DUP (the mkfs.btrfs default) - error message header updates: - print error state: transaction abort, other error, log tree errors - print transient filesystem state: remount, device replace, ignored checksum verifications - tree-checker: verify the transaction id of the to-be-written dirty extent buffer Performance improvements for fsync: - directory logging speedups (up to -90% run time) - avoid logging all directory changes during renames (up to -60% run time) - avoid inode logging during rename and link when possible (up to -60% run time) - prepare extents to be logged before locking a log tree path (throughput +7%) - stop copying old file extents when doing a full fsync() - improved logging of old extents after truncate Core, fixes: - improved stale device identification by dev_t and not just path (for devices that are behind other layers like device mapper) - continued extent tree v2 preparatory work - disable features that won't work yet - add wrappers and abstractions for new tree roots - improved error handling - add super block write annotations around background block group reclaim - fix device scanning messages potentially accessing stale pointer - cleanups and refactoring VFS: - allow reflinks/deduplication from two different mounts of the same filesystem - export and add helpers for read/write range verification, for the encoded ioctls" * tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits) btrfs: zoned: put block group after final usage btrfs: don't access possibly stale fs_info data in device_list_add btrfs: add lockdep_assert_held to need_preemptive_reclaim btrfs: verify the tranisd of the to-be-written dirty extent buffer btrfs: unify the error handling of btrfs_read_buffer() btrfs: unify the error handling pattern for read_tree_block() btrfs: factor out do_free_extent_accounting helper btrfs: remove last_ref from the extent freeing code btrfs: add a alloc_reserved_extent helper btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block btrfs: add and use helper for unlinking inode during log replay btrfs: extend locking to all space_info members accesses btrfs: zoned: mark relocation as writing fs: allow cross-vfsmount reflink/dedupe btrfs: remove the cross file system checks from remap btrfs: pass btrfs_fs_info to btrfs_recover_relocation btrfs: pass btrfs_fs_info for deleting snapshots and cleaner btrfs: add filesystems state details to error messages btrfs: deal with unexpected extent type during reflinking btrfs: fix unexpected error path when reflinking an inline extent ...
This commit is contained in:
commit
5191290407
@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||
if (IS_ERR(eb)) {
|
||||
free_pref(ref);
|
||||
return PTR_ERR(eb);
|
||||
} else if (!extent_buffer_uptodate(eb)) {
|
||||
}
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
free_pref(ref);
|
||||
free_extent_buffer(eb);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (lock)
|
||||
btrfs_tree_read_lock(eb);
|
||||
if (btrfs_header_level(eb) == 0)
|
||||
@ -1335,7 +1337,8 @@ again:
|
||||
if (IS_ERR(eb)) {
|
||||
ret = PTR_ERR(eb);
|
||||
goto out;
|
||||
} else if (!extent_buffer_uptodate(eb)) {
|
||||
}
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
free_extent_buffer(eb);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
|
@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
|
||||
sb_start_write(fs_info->sb);
|
||||
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
sb_end_write(fs_info->sb);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Long running balances can keep us blocked here for eternity, so
|
||||
@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
*/
|
||||
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
|
||||
btrfs_exclop_finish(fs_info);
|
||||
sb_end_write(fs_info->sb);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1605,6 +1610,7 @@ next:
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
mutex_unlock(&fs_info->reclaim_bgs_lock);
|
||||
btrfs_exclop_finish(fs_info);
|
||||
sb_end_write(fs_info->sb);
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
|
||||
@ -2006,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
cache->length = key->offset;
|
||||
cache->used = btrfs_stack_block_group_used(bgi);
|
||||
cache->flags = btrfs_stack_block_group_flags(bgi);
|
||||
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
|
||||
|
||||
set_free_space_tree_thresholds(cache);
|
||||
|
||||
@ -2288,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
|
||||
spin_lock(&block_group->lock);
|
||||
btrfs_set_stack_block_group_used(&bgi, block_group->used);
|
||||
btrfs_set_stack_block_group_chunk_objectid(&bgi,
|
||||
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
|
||||
block_group->global_root_id);
|
||||
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
|
||||
key.objectid = block_group->start;
|
||||
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
||||
@ -2444,6 +2451,27 @@ next:
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
}
|
||||
|
||||
/*
|
||||
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
|
||||
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
|
||||
*/
|
||||
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
|
||||
{
|
||||
u64 div = SZ_1G;
|
||||
u64 index;
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
||||
|
||||
/* If we have a smaller fs index based on 128MiB. */
|
||||
if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
|
||||
div = SZ_128M;
|
||||
|
||||
offset = div64_u64(offset, div);
|
||||
div64_u64_rem(offset, fs_info->nr_global_roots, &index);
|
||||
return index;
|
||||
}
|
||||
|
||||
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytes_used, u64 type,
|
||||
u64 chunk_offset, u64 size)
|
||||
@ -2464,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
cache->flags = type;
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
|
||||
|
||||
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
|
||||
cache->needs_free_space = 1;
|
||||
|
||||
@ -2693,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
|
||||
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
btrfs_set_stack_block_group_used(&bgi, cache->used);
|
||||
btrfs_set_stack_block_group_chunk_objectid(&bgi,
|
||||
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
|
||||
cache->global_root_id);
|
||||
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
|
||||
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
@ -68,6 +68,7 @@ struct btrfs_block_group {
|
||||
u64 bytes_super;
|
||||
u64 flags;
|
||||
u64 cache_generation;
|
||||
u64 global_root_id;
|
||||
|
||||
/*
|
||||
* If the free space extent count exceeds this number, convert the block
|
||||
|
@ -13,6 +13,13 @@
|
||||
#include "ordered-data.h"
|
||||
#include "delayed-inode.h"
|
||||
|
||||
/*
|
||||
* Since we search a directory based on f_pos (struct dir_context::pos) we have
|
||||
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
|
||||
* everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
|
||||
*/
|
||||
#define BTRFS_DIR_START_INDEX 2
|
||||
|
||||
/*
|
||||
* ordered_data_close is set by truncate when a file that used
|
||||
* to have good data has been truncated to zero. When it is set
|
||||
@ -173,8 +180,9 @@ struct btrfs_inode {
|
||||
u64 disk_i_size;
|
||||
|
||||
/*
|
||||
* if this is a directory then index_cnt is the counter for the index
|
||||
* number for new files that are created
|
||||
* If this is a directory then index_cnt is the counter for the index
|
||||
* number for new files that are created. For an empty directory, this
|
||||
* must be initialized to BTRFS_DIR_START_INDEX.
|
||||
*/
|
||||
u64 index_cnt;
|
||||
|
||||
@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should be called while holding the inode's VFS lock in exclusive mode or in a
|
||||
* context where no one else can access the inode concurrently (during inode
|
||||
* creation or when loading an inode from disk).
|
||||
*/
|
||||
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
|
||||
{
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
/*
|
||||
* The inode may have been part of a reflink operation in the last
|
||||
* transaction that modified it, and then a fsync has reset the
|
||||
* last_reflink_trans to avoid subsequent fsyncs in the same
|
||||
* transaction to do unnecessary work. So update last_reflink_trans
|
||||
* to the last_trans value (we have to be pessimistic and assume a
|
||||
* reflink happened).
|
||||
*
|
||||
* The ->last_trans is protected by the inode's spinlock and we can
|
||||
* have a concurrent ordered extent completion update it. Also set
|
||||
* last_reflink_trans to ->last_trans only if the former is less than
|
||||
* the later, because we can be called in a context where
|
||||
* last_reflink_trans was set to the current transaction generation
|
||||
* while ->last_trans was not yet updated in the current transaction,
|
||||
* and therefore has a lower value.
|
||||
*/
|
||||
spin_lock(&inode->lock);
|
||||
if (inode->last_reflink_trans < inode->last_trans)
|
||||
inode->last_reflink_trans = inode->last_trans;
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
|
||||
bi_size += bvec->bv_len;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->errors = 1;
|
||||
cb->status = bio->bi_status;
|
||||
|
||||
ASSERT(bi_size && bi_size <= cb->compressed_len);
|
||||
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
|
||||
@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
|
||||
return last_io;
|
||||
}
|
||||
|
||||
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
|
||||
static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
{
|
||||
unsigned int index;
|
||||
struct page *page;
|
||||
@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
|
||||
}
|
||||
|
||||
/* Do io completion on the original bio */
|
||||
if (cb->errors) {
|
||||
bio_io_error(cb->orig_bio);
|
||||
if (cb->status != BLK_STS_OK) {
|
||||
cb->orig_bio->bi_status = cb->status;
|
||||
bio_endio(cb->orig_bio);
|
||||
} else {
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
ASSERT(bio);
|
||||
ASSERT(!bio->bi_status);
|
||||
/*
|
||||
* We have verified the checksum already, set page checked so
|
||||
* the end_io handlers know about it
|
||||
*/
|
||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||
ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
|
||||
u64 bvec_start = page_offset(bvec->bv_page) +
|
||||
bvec->bv_offset;
|
||||
@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
* Some IO in this cb have failed, just skip checksum as there
|
||||
* is no way it could be correct.
|
||||
*/
|
||||
if (cb->errors == 1)
|
||||
if (cb->status != BLK_STS_OK)
|
||||
goto csum_failed;
|
||||
|
||||
inode = cb->inode;
|
||||
@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
|
||||
csum_failed:
|
||||
if (ret)
|
||||
cb->errors = 1;
|
||||
finish_compressed_bio_read(cb, bio);
|
||||
cb->status = errno_to_blk_status(ret);
|
||||
finish_compressed_bio_read(cb);
|
||||
out:
|
||||
bio_put(bio);
|
||||
}
|
||||
@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
||||
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
|
||||
struct page *pages[16];
|
||||
unsigned long nr_pages = end_index - index + 1;
|
||||
const int errno = blk_status_to_errno(cb->status);
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (cb->errors)
|
||||
mapping_set_error(inode->i_mapping, -EIO);
|
||||
if (errno)
|
||||
mapping_set_error(inode->i_mapping, errno);
|
||||
|
||||
while (nr_pages > 0) {
|
||||
ret = find_get_pages_contig(inode->i_mapping, index,
|
||||
@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (cb->errors)
|
||||
if (errno)
|
||||
SetPageError(pages[i]);
|
||||
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
|
||||
cb->start, cb->len);
|
||||
@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
|
||||
*/
|
||||
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
|
||||
cb->start, cb->start + cb->len - 1,
|
||||
!cb->errors);
|
||||
cb->status == BLK_STS_OK);
|
||||
|
||||
end_compressed_writeback(inode, cb);
|
||||
if (cb->writeback)
|
||||
end_compressed_writeback(inode, cb);
|
||||
/* Note, our inode could be gone now */
|
||||
|
||||
/*
|
||||
@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
struct page **compressed_pages,
|
||||
unsigned int nr_pages,
|
||||
unsigned int write_flags,
|
||||
struct cgroup_subsys_state *blkcg_css)
|
||||
struct cgroup_subsys_state *blkcg_css,
|
||||
bool writeback)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct bio *bio = NULL;
|
||||
@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
if (!cb)
|
||||
return BLK_STS_RESOURCE;
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
cb->errors = 0;
|
||||
cb->status = BLK_STS_OK;
|
||||
cb->inode = &inode->vfs_inode;
|
||||
cb->start = start;
|
||||
cb->len = len;
|
||||
cb->mirror_num = 0;
|
||||
cb->compressed_pages = compressed_pages;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->writeback = writeback;
|
||||
cb->orig_bio = NULL;
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
|
||||
if (submit) {
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, true);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
}
|
||||
@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
u64 em_len;
|
||||
u64 em_start;
|
||||
struct extent_map *em;
|
||||
blk_status_t ret = BLK_STS_RESOURCE;
|
||||
blk_status_t ret;
|
||||
int faili = 0;
|
||||
u8 *sums;
|
||||
|
||||
@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
|
||||
read_unlock(&em_tree->lock);
|
||||
if (!em)
|
||||
return BLK_STS_IOERR;
|
||||
if (!em) {
|
||||
ret = BLK_STS_IOERR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
|
||||
compressed_len = em->block_len;
|
||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||
if (!cb)
|
||||
if (!cb) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
cb->errors = 0;
|
||||
cb->status = BLK_STS_OK;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
sums = cb->sums;
|
||||
@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
|
||||
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
|
||||
GFP_NOFS);
|
||||
if (!cb->compressed_pages)
|
||||
if (!cb->compressed_pages) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto fail1;
|
||||
}
|
||||
|
||||
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
|
||||
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
|
||||
@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
comp_bio = NULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return BLK_STS_OK;
|
||||
|
||||
fail2:
|
||||
while (faili >= 0) {
|
||||
@ -951,6 +960,8 @@ fail1:
|
||||
kfree(cb);
|
||||
out:
|
||||
free_extent_map(em);
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
return ret;
|
||||
finish_cb:
|
||||
if (comp_bio) {
|
||||
@ -970,7 +981,7 @@ finish_cb:
|
||||
*/
|
||||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_read(cb, NULL);
|
||||
finish_compressed_bio_read(cb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,8 @@ struct btrfs_inode;
|
||||
|
||||
/* Maximum length of compressed data stored on disk */
|
||||
#define BTRFS_MAX_COMPRESSED (SZ_128K)
|
||||
static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
|
||||
|
||||
/* Maximum size of data before compression */
|
||||
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
|
||||
|
||||
@ -52,8 +54,11 @@ struct compressed_bio {
|
||||
/* The compression algorithm for this bio */
|
||||
u8 compress_type;
|
||||
|
||||
/* Whether this is a write for writeback. */
|
||||
bool writeback;
|
||||
|
||||
/* IO errors */
|
||||
u8 errors;
|
||||
blk_status_t status;
|
||||
int mirror_num;
|
||||
|
||||
/* for reads, this is the bio we are copying the data into */
|
||||
@ -95,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
struct page **compressed_pages,
|
||||
unsigned int nr_pages,
|
||||
unsigned int write_flags,
|
||||
struct cgroup_subsys_state *blkcg_css);
|
||||
struct cgroup_subsys_state *blkcg_css,
|
||||
bool writeback);
|
||||
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
|
||||
|
108
fs/btrfs/ctree.c
108
fs/btrfs/ctree.c
@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
|
||||
btrfs_header_owner(parent),
|
||||
btrfs_node_ptr_generation(parent, slot),
|
||||
level - 1, &first_key);
|
||||
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
|
||||
if (IS_ERR(eb))
|
||||
return eb;
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
free_extent_buffer(eb);
|
||||
eb = ERR_PTR(-EIO);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return eb;
|
||||
@ -1436,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
|
||||
/* now we're allowed to do a blocking uptodate check */
|
||||
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
|
||||
if (!ret) {
|
||||
*eb_ret = tmp;
|
||||
return 0;
|
||||
if (ret) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EIO;
|
||||
}
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EIO;
|
||||
*eb_ret = tmp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
ret = -EAGAIN;
|
||||
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
|
||||
gen, parent_level - 1, &first_key);
|
||||
if (!IS_ERR(tmp)) {
|
||||
/*
|
||||
* If the read above didn't mark this buffer up to date,
|
||||
* it will never end up being up to date. Set ret to EIO now
|
||||
* and give up so that our caller doesn't loop forever
|
||||
* on our EAGAINs.
|
||||
*/
|
||||
if (!extent_buffer_uptodate(tmp))
|
||||
ret = -EIO;
|
||||
free_extent_buffer(tmp);
|
||||
} else {
|
||||
ret = PTR_ERR(tmp);
|
||||
if (IS_ERR(tmp)) {
|
||||
btrfs_release_path(p);
|
||||
return PTR_ERR(tmp);
|
||||
}
|
||||
/*
|
||||
* If the read above didn't mark this buffer up to date,
|
||||
* it will never end up being up to date. Set ret to EIO now
|
||||
* and give up so that our caller doesn't loop forever
|
||||
* on our EAGAINs.
|
||||
*/
|
||||
if (!extent_buffer_uptodate(tmp))
|
||||
ret = -EIO;
|
||||
free_extent_buffer(tmp);
|
||||
|
||||
btrfs_release_path(p);
|
||||
return ret;
|
||||
@ -2990,16 +2992,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
if (free_space < data_size)
|
||||
goto out_unlock;
|
||||
|
||||
/* cow and double check */
|
||||
ret = btrfs_cow_block(trans, root, right, upper,
|
||||
slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
free_space = btrfs_leaf_free_space(right);
|
||||
if (free_space < data_size)
|
||||
goto out_unlock;
|
||||
|
||||
left_nritems = btrfs_header_nritems(left);
|
||||
if (left_nritems == 0)
|
||||
goto out_unlock;
|
||||
@ -3224,7 +3221,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* cow and double check */
|
||||
ret = btrfs_cow_block(trans, root, left,
|
||||
path->nodes[1], slot - 1, &left,
|
||||
BTRFS_NESTING_LEFT_COW);
|
||||
@ -3235,12 +3231,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_space = btrfs_leaf_free_space(left);
|
||||
if (free_space < data_size) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (check_sibling_keys(left, right)) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
@ -4170,24 +4160,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct extent_buffer *leaf;
|
||||
u32 last_off;
|
||||
u32 dsize = 0;
|
||||
int ret = 0;
|
||||
int wret;
|
||||
int i;
|
||||
u32 nritems;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
last_off = btrfs_item_offset(leaf, slot + nr - 1);
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
dsize += btrfs_item_size(leaf, slot + i);
|
||||
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
|
||||
if (slot + nr != nritems) {
|
||||
int data_end = leaf_data_end(leaf);
|
||||
const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
|
||||
const int data_end = leaf_data_end(leaf);
|
||||
struct btrfs_map_token token;
|
||||
u32 dsize = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
dsize += btrfs_item_size(leaf, slot + i);
|
||||
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end + dsize,
|
||||
@ -4227,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
fixup_low_keys(path, &disk_key, 1);
|
||||
}
|
||||
|
||||
/* delete the leaf if it is mostly empty */
|
||||
/*
|
||||
* Try to delete the leaf if it is mostly empty. We do this by
|
||||
* trying to move all its items into its left and right neighbours.
|
||||
* If we can't move all the items, then we don't delete it - it's
|
||||
* not ideal, but future insertions might fill the leaf with more
|
||||
* items, or items from other leaves might be moved later into our
|
||||
* leaf due to deletions on those leaves.
|
||||
*/
|
||||
if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
|
||||
u32 min_push_space;
|
||||
|
||||
/* push_leaf_left fixes the path.
|
||||
* make sure the path still points to our leaf
|
||||
* for possible call to del_ptr below
|
||||
*/
|
||||
slot = path->slots[1];
|
||||
atomic_inc(&leaf->refs);
|
||||
|
||||
wret = push_leaf_left(trans, root, path, 1, 1,
|
||||
1, (u32)-1);
|
||||
/*
|
||||
* We want to be able to at least push one item to the
|
||||
* left neighbour leaf, and that's the first item.
|
||||
*/
|
||||
min_push_space = sizeof(struct btrfs_item) +
|
||||
btrfs_item_size(leaf, 0);
|
||||
wret = push_leaf_left(trans, root, path, 0,
|
||||
min_push_space, 1, (u32)-1);
|
||||
if (wret < 0 && wret != -ENOSPC)
|
||||
ret = wret;
|
||||
|
||||
if (path->nodes[0] == leaf &&
|
||||
btrfs_header_nritems(leaf)) {
|
||||
wret = push_leaf_right(trans, root, path, 1,
|
||||
1, 1, 0);
|
||||
/*
|
||||
* If we were not able to push all items from our
|
||||
* leaf to its left neighbour, then attempt to
|
||||
* either push all the remaining items to the
|
||||
* right neighbour or none. There's no advantage
|
||||
* in pushing only some items, instead of all, as
|
||||
* it's pointless to end up with a leaf having
|
||||
* too few items while the neighbours can be full
|
||||
* or nearly full.
|
||||
*/
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
min_push_space = leaf_space_used(leaf, 0, nritems);
|
||||
wret = push_leaf_right(trans, root, path, 0,
|
||||
min_push_space, 1, 0);
|
||||
if (wret < 0 && wret != -ENOSPC)
|
||||
ret = wret;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
struct btrfs_ordered_sum;
|
||||
struct btrfs_ref;
|
||||
struct btrfs_bio;
|
||||
struct btrfs_ioctl_encoded_io_args;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
@ -148,6 +149,8 @@ enum {
|
||||
|
||||
/* Indicates there was an error cleaning up a log tree. */
|
||||
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
|
||||
|
||||
BTRFS_FS_STATE_COUNT
|
||||
};
|
||||
|
||||
#define BTRFS_BACKREF_REV_MAX 256
|
||||
@ -274,8 +277,14 @@ struct btrfs_super_block {
|
||||
/* the UUID written into btree blocks */
|
||||
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
||||
|
||||
/* Extent tree v2 */
|
||||
__le64 block_group_root;
|
||||
__le64 block_group_root_generation;
|
||||
u8 block_group_root_level;
|
||||
|
||||
/* future expansion */
|
||||
__le64 reserved[28];
|
||||
u8 reserved8[7];
|
||||
__le64 reserved[25];
|
||||
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
|
||||
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
|
||||
|
||||
@ -300,6 +309,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
/*
|
||||
* Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
|
||||
*/
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
|
||||
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
|
||||
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID56 | \
|
||||
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
|
||||
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
|
||||
BTRFS_FEATURE_INCOMPAT_ZONED | \
|
||||
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
|
||||
#else
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
|
||||
@ -314,6 +343,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
|
||||
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
|
||||
BTRFS_FEATURE_INCOMPAT_ZONED)
|
||||
#endif
|
||||
|
||||
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
|
||||
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
|
||||
@ -636,6 +666,7 @@ struct btrfs_fs_info {
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_root *uuid_root;
|
||||
struct btrfs_root *data_reloc_root;
|
||||
struct btrfs_root *block_group_root;
|
||||
|
||||
/* the log root tree is a directory of all the other log roots */
|
||||
struct btrfs_root *log_root_tree;
|
||||
@ -1030,6 +1061,8 @@ struct btrfs_fs_info {
|
||||
spinlock_t relocation_bg_lock;
|
||||
u64 data_reloc_bg;
|
||||
|
||||
u64 nr_global_roots;
|
||||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
struct list_head zone_active_bgs;
|
||||
|
||||
@ -1609,25 +1642,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
|
||||
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
|
||||
const type *s) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
|
||||
} \
|
||||
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
|
||||
u##bits val) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
|
||||
} \
|
||||
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
|
||||
const type *s) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
|
||||
} \
|
||||
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
|
||||
type *s, u##bits val) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
|
||||
}
|
||||
|
||||
@ -1658,8 +1691,8 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
|
||||
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
|
||||
struct btrfs_dev_item *s)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(u64) !=
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
static_assert(sizeof(u64) ==
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
|
||||
total_bytes));
|
||||
}
|
||||
@ -1667,8 +1700,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
|
||||
struct btrfs_dev_item *s,
|
||||
u64 val)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(u64) !=
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
static_assert(sizeof(u64) ==
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
|
||||
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
|
||||
}
|
||||
@ -2328,6 +2361,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
|
||||
num_devices, 64);
|
||||
|
||||
/*
|
||||
* For extent tree v2 we overload the extent root with the block group root, as
|
||||
* we will have multiple extent roots.
|
||||
*/
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
|
||||
extent_root, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
|
||||
extent_root_gen, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
|
||||
struct btrfs_root_backup, extent_root_level, 8);
|
||||
|
||||
/* struct btrfs_balance_item */
|
||||
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
|
||||
|
||||
@ -2462,6 +2506,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
|
||||
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
|
||||
uuid_tree_generation, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
|
||||
block_group_root, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
|
||||
struct btrfs_super_block,
|
||||
block_group_root_generation, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
|
||||
block_group_root_level, 8);
|
||||
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s);
|
||||
const char *btrfs_super_csum_name(u16 csum_type);
|
||||
@ -2839,7 +2890,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
u64 disk_num_bytes);
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
|
||||
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 end);
|
||||
@ -3155,7 +3207,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_ordered_sum *sums);
|
||||
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
u64 file_start, int contig);
|
||||
u64 offset, bool one_ordered);
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit);
|
||||
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
@ -3256,6 +3308,11 @@ int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
extern const struct iomap_ops btrfs_dio_iomap_ops;
|
||||
extern const struct iomap_dio_ops btrfs_dio_ops;
|
||||
@ -3318,6 +3375,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
|
||||
struct btrfs_trans_handle **trans_out);
|
||||
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, u64 start, u64 end);
|
||||
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
int btrfs_release_file(struct inode *inode, struct file *file);
|
||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
@ -3774,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_recover_relocation(struct btrfs_root *root);
|
||||
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
|
||||
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *buf,
|
||||
|
@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
|
||||
u64 num_bytes, u64 *meta_reserve,
|
||||
u64 *qgroup_reserve)
|
||||
u64 num_bytes, u64 disk_num_bytes,
|
||||
u64 *meta_reserve, u64 *qgroup_reserve)
|
||||
{
|
||||
u64 nr_extents = count_max_extents(num_bytes);
|
||||
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
|
||||
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
|
||||
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
|
||||
|
||||
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
|
||||
@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
|
||||
*qgroup_reserve = nr_extents * fs_info->nodesize;
|
||||
}
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
u64 disk_num_bytes)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
||||
}
|
||||
|
||||
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
|
||||
disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
|
||||
|
||||
/*
|
||||
* We always want to do it this way, every other way is wrong and ends
|
||||
@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
||||
* everything out and try again, which is bad. This way we just
|
||||
* over-reserve slightly, and clean up the mess when we are done.
|
||||
*/
|
||||
calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
|
||||
&qgroup_reserve);
|
||||
calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
|
||||
&meta_reserve, &qgroup_reserve);
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
||||
spin_lock(&inode->lock);
|
||||
nr_extents = count_max_extents(num_bytes);
|
||||
btrfs_mod_outstanding_extents(inode, nr_extents);
|
||||
inode->csum_bytes += num_bytes;
|
||||
inode->csum_bytes += disk_num_bytes;
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len);
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len, len);
|
||||
if (ret < 0) {
|
||||
btrfs_free_reserved_data_space(inode, *reserved, start, len);
|
||||
extent_changeset_free(*reserved);
|
||||
|
@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *srcdev,
|
||||
struct btrfs_device **device_out)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *device;
|
||||
struct block_device *bdev;
|
||||
struct rcu_string *name;
|
||||
@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
|
||||
sync_blockdev(bdev);
|
||||
|
||||
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (device->bdev == bdev) {
|
||||
btrfs_err(fs_info,
|
||||
"target device is in the filesystem!");
|
||||
@ -302,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
goto error;
|
||||
}
|
||||
rcu_assign_pointer(device->name, name);
|
||||
ret = lookup_bdev(device_path, &device->devt);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
device->generation = 0;
|
||||
@ -320,17 +324,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
device->mode = FMODE_EXCL;
|
||||
device->dev_stats_valid = 1;
|
||||
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
|
||||
device->fs_devices = fs_info->fs_devices;
|
||||
device->fs_devices = fs_devices;
|
||||
|
||||
ret = btrfs_get_dev_zone_info(device, false);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
list_add(&device->dev_list, &fs_info->fs_devices->devices);
|
||||
fs_info->fs_devices->num_devices++;
|
||||
fs_info->fs_devices->open_devices++;
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_add(&device->dev_list, &fs_devices->devices);
|
||||
fs_devices->num_devices++;
|
||||
fs_devices->open_devices++;
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
*device_out = device;
|
||||
return 0;
|
||||
|
@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
|
||||
else
|
||||
ret = btrfs_check_leaf_full(eb);
|
||||
|
||||
if (ret < 0) {
|
||||
btrfs_print_tree(eb, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* Also check the generation, the eb reached here must be newer than
|
||||
* last committed. Or something seriously wrong happened.
|
||||
*/
|
||||
if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
|
||||
ret = -EUCLEAN;
|
||||
btrfs_err(fs_info,
|
||||
"block=%llu write time tree block corruption detected",
|
||||
eb->start);
|
||||
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
|
||||
return ret;
|
||||
"block=%llu bad generation, have %llu expect > %llu",
|
||||
eb->start, btrfs_header_generation(eb),
|
||||
fs_info->last_trans_committed);
|
||||
goto error;
|
||||
}
|
||||
write_extent_buffer(eb, result, 0, fs_info->csum_size);
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
btrfs_print_tree(eb, 0);
|
||||
btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
|
||||
eb->start);
|
||||
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Checksum all dirty extent buffers in one bio_vec */
|
||||
@ -1289,12 +1303,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
|
||||
return root;
|
||||
}
|
||||
|
||||
static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 ret;
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return 0;
|
||||
|
||||
if (bytenr)
|
||||
block_group = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
else
|
||||
block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
|
||||
ASSERT(block_group);
|
||||
if (!block_group)
|
||||
return 0;
|
||||
ret = block_group->global_root_id;
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_key key = {
|
||||
.objectid = BTRFS_CSUM_TREE_OBJECTID,
|
||||
.type = BTRFS_ROOT_ITEM_KEY,
|
||||
.offset = 0,
|
||||
.offset = btrfs_global_root_id(fs_info, bytenr),
|
||||
};
|
||||
|
||||
return btrfs_global_root(fs_info, &key);
|
||||
@ -1305,7 +1340,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
struct btrfs_key key = {
|
||||
.objectid = BTRFS_EXTENT_TREE_OBJECTID,
|
||||
.type = BTRFS_ROOT_ITEM_KEY,
|
||||
.offset = 0,
|
||||
.offset = btrfs_global_root_id(fs_info, bytenr),
|
||||
};
|
||||
|
||||
return btrfs_global_root(fs_info, &key);
|
||||
@ -1522,7 +1557,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
|
||||
ret = PTR_ERR(root->node);
|
||||
root->node = NULL;
|
||||
goto fail;
|
||||
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
|
||||
}
|
||||
if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
|
||||
ret = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
@ -1727,6 +1763,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
||||
btrfs_put_root(fs_info->uuid_root);
|
||||
btrfs_put_root(fs_info->fs_root);
|
||||
btrfs_put_root(fs_info->data_reloc_root);
|
||||
btrfs_put_root(fs_info->block_group_root);
|
||||
btrfs_check_leaked_roots(fs_info);
|
||||
btrfs_extent_buffer_leak_debug_check(fs_info);
|
||||
kfree(fs_info->super_copy);
|
||||
@ -1925,8 +1962,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
|
||||
|
||||
static int cleaner_kthread(void *arg)
|
||||
{
|
||||
struct btrfs_root *root = arg;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
|
||||
int again;
|
||||
|
||||
while (1) {
|
||||
@ -1959,7 +1995,7 @@ static int cleaner_kthread(void *arg)
|
||||
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
|
||||
again = btrfs_clean_one_deleted_snapshot(root);
|
||||
again = btrfs_clean_one_deleted_snapshot(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
|
||||
/*
|
||||
@ -2095,8 +2131,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
|
||||
{
|
||||
const int next_backup = info->backup_root_index;
|
||||
struct btrfs_root_backup *root_backup;
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
|
||||
|
||||
root_backup = info->super_for_commit->super_roots + next_backup;
|
||||
|
||||
@ -2121,11 +2155,30 @@ static void backup_super_roots(struct btrfs_fs_info *info)
|
||||
btrfs_set_backup_chunk_root_level(root_backup,
|
||||
btrfs_header_level(info->chunk_root->node));
|
||||
|
||||
btrfs_set_backup_extent_root(root_backup, extent_root->node->start);
|
||||
btrfs_set_backup_extent_root_gen(root_backup,
|
||||
btrfs_header_generation(extent_root->node));
|
||||
btrfs_set_backup_extent_root_level(root_backup,
|
||||
btrfs_header_level(extent_root->node));
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
|
||||
btrfs_set_backup_block_group_root(root_backup,
|
||||
info->block_group_root->node->start);
|
||||
btrfs_set_backup_block_group_root_gen(root_backup,
|
||||
btrfs_header_generation(info->block_group_root->node));
|
||||
btrfs_set_backup_block_group_root_level(root_backup,
|
||||
btrfs_header_level(info->block_group_root->node));
|
||||
} else {
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
|
||||
|
||||
btrfs_set_backup_extent_root(root_backup,
|
||||
extent_root->node->start);
|
||||
btrfs_set_backup_extent_root_gen(root_backup,
|
||||
btrfs_header_generation(extent_root->node));
|
||||
btrfs_set_backup_extent_root_level(root_backup,
|
||||
btrfs_header_level(extent_root->node));
|
||||
|
||||
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
|
||||
btrfs_set_backup_csum_root_gen(root_backup,
|
||||
btrfs_header_generation(csum_root->node));
|
||||
btrfs_set_backup_csum_root_level(root_backup,
|
||||
btrfs_header_level(csum_root->node));
|
||||
}
|
||||
|
||||
/*
|
||||
* we might commit during log recovery, which happens before we set
|
||||
@ -2146,12 +2199,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
|
||||
btrfs_set_backup_dev_root_level(root_backup,
|
||||
btrfs_header_level(info->dev_root->node));
|
||||
|
||||
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
|
||||
btrfs_set_backup_csum_root_gen(root_backup,
|
||||
btrfs_header_generation(csum_root->node));
|
||||
btrfs_set_backup_csum_root_level(root_backup,
|
||||
btrfs_header_level(csum_root->node));
|
||||
|
||||
btrfs_set_backup_total_bytes(root_backup,
|
||||
btrfs_super_total_bytes(info->super_copy));
|
||||
btrfs_set_backup_bytes_used(root_backup,
|
||||
@ -2269,6 +2316,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
|
||||
free_root_extent_buffers(info->uuid_root);
|
||||
free_root_extent_buffers(info->fs_root);
|
||||
free_root_extent_buffers(info->data_reloc_root);
|
||||
free_root_extent_buffers(info->block_group_root);
|
||||
if (free_chunk_root)
|
||||
free_root_extent_buffers(info->chunk_root);
|
||||
}
|
||||
@ -2504,11 +2552,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
log_tree_root->node = NULL;
|
||||
btrfs_put_root(log_tree_root);
|
||||
return ret;
|
||||
} else if (!extent_buffer_uptodate(log_tree_root->node)) {
|
||||
}
|
||||
if (!extent_buffer_uptodate(log_tree_root->node)) {
|
||||
btrfs_err(fs_info, "failed to read log tree");
|
||||
btrfs_put_root(log_tree_root);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* returns with log_tree_root freed on success */
|
||||
ret = btrfs_recover_log_trees(log_tree_root);
|
||||
if (ret) {
|
||||
@ -2533,6 +2583,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = tree_root->fs_info;
|
||||
struct btrfs_root *root;
|
||||
u64 max_global_id = 0;
|
||||
int ret;
|
||||
struct btrfs_key key = {
|
||||
.objectid = objectid,
|
||||
@ -2568,6 +2619,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
|
||||
break;
|
||||
btrfs_release_path(path);
|
||||
|
||||
/*
|
||||
* Just worry about this for extent tree, it'll be the same for
|
||||
* everybody.
|
||||
*/
|
||||
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
|
||||
max_global_id = max(max_global_id, key.offset);
|
||||
|
||||
found = true;
|
||||
root = read_tree_root_path(tree_root, path, &key);
|
||||
if (IS_ERR(root)) {
|
||||
@ -2585,6 +2643,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
|
||||
fs_info->nr_global_roots = max_global_id + 1;
|
||||
|
||||
if (!found || ret) {
|
||||
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
|
||||
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
@ -2930,6 +2991,56 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
root->node = read_tree_block(root->fs_info, bytenr,
|
||||
root->root_key.objectid, gen, level, NULL);
|
||||
if (IS_ERR(root->node)) {
|
||||
ret = PTR_ERR(root->node);
|
||||
root->node = NULL;
|
||||
return ret;
|
||||
}
|
||||
if (!extent_buffer_uptodate(root->node)) {
|
||||
free_extent_buffer(root->node);
|
||||
root->node = NULL;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
btrfs_set_root_node(&root->root_item, root->node);
|
||||
root->commit_root = btrfs_root_node(root);
|
||||
btrfs_set_root_refs(&root->root_item, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int load_important_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_super_block *sb = fs_info->super_copy;
|
||||
u64 gen, bytenr;
|
||||
int level, ret;
|
||||
|
||||
bytenr = btrfs_super_root(sb);
|
||||
gen = btrfs_super_generation(sb);
|
||||
level = btrfs_super_root_level(sb);
|
||||
ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
|
||||
if (ret) {
|
||||
btrfs_warn(fs_info, "couldn't read tree root");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return 0;
|
||||
|
||||
bytenr = btrfs_super_block_group_root(sb);
|
||||
gen = btrfs_super_block_group_root_generation(sb);
|
||||
level = btrfs_super_block_group_root_level(sb);
|
||||
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
|
||||
if (ret)
|
||||
btrfs_warn(fs_info, "couldn't read block group root");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int backup_index = find_newest_super_backup(fs_info);
|
||||
@ -2939,10 +3050,17 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
|
||||
u64 generation;
|
||||
int level;
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
struct btrfs_root *root;
|
||||
|
||||
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
|
||||
GFP_KERNEL);
|
||||
if (!root)
|
||||
return -ENOMEM;
|
||||
fs_info->block_group_root = root;
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
|
||||
if (handle_error) {
|
||||
if (!IS_ERR(tree_root->node))
|
||||
free_extent_buffer(tree_root->node);
|
||||
@ -2967,29 +3085,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
generation = btrfs_super_generation(sb);
|
||||
level = btrfs_super_root_level(sb);
|
||||
tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
|
||||
BTRFS_ROOT_TREE_OBJECTID,
|
||||
generation, level, NULL);
|
||||
if (IS_ERR(tree_root->node)) {
|
||||
handle_error = true;
|
||||
ret = PTR_ERR(tree_root->node);
|
||||
tree_root->node = NULL;
|
||||
btrfs_warn(fs_info, "couldn't read tree root");
|
||||
continue;
|
||||
|
||||
} else if (!extent_buffer_uptodate(tree_root->node)) {
|
||||
ret = load_important_roots(fs_info);
|
||||
if (ret) {
|
||||
handle_error = true;
|
||||
ret = -EIO;
|
||||
btrfs_warn(fs_info, "error while reading tree root");
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
|
||||
tree_root->commit_root = btrfs_root_node(tree_root);
|
||||
btrfs_set_root_refs(&tree_root->root_item, 1);
|
||||
|
||||
/*
|
||||
* No need to hold btrfs_root::objectid_mutex since the fs
|
||||
* hasn't been fully initialised and we are the only user
|
||||
@ -3009,8 +3111,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
|
||||
/* All successful */
|
||||
fs_info->generation = generation;
|
||||
fs_info->last_trans_committed = generation;
|
||||
fs_info->generation = btrfs_header_generation(tree_root->node);
|
||||
fs_info->last_trans_committed = fs_info->generation;
|
||||
fs_info->last_reloc_trans = 0;
|
||||
|
||||
/* Always begin writing backup roots after the one being used */
|
||||
@ -3293,7 +3395,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
|
||||
up_read(&fs_info->cleanup_work_sem);
|
||||
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
ret = btrfs_recover_relocation(fs_info->tree_root);
|
||||
ret = btrfs_recover_relocation(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
if (ret < 0) {
|
||||
btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
|
||||
@ -3594,21 +3696,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
|
||||
generation = btrfs_super_chunk_root_generation(disk_super);
|
||||
level = btrfs_super_chunk_root_level(disk_super);
|
||||
|
||||
chunk_root->node = read_tree_block(fs_info,
|
||||
btrfs_super_chunk_root(disk_super),
|
||||
BTRFS_CHUNK_TREE_OBJECTID,
|
||||
generation, level, NULL);
|
||||
if (IS_ERR(chunk_root->node) ||
|
||||
!extent_buffer_uptodate(chunk_root->node)) {
|
||||
ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
|
||||
generation, level);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "failed to read chunk root");
|
||||
if (!IS_ERR(chunk_root->node))
|
||||
free_extent_buffer(chunk_root->node);
|
||||
chunk_root->node = NULL;
|
||||
goto fail_tree_roots;
|
||||
}
|
||||
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
|
||||
chunk_root->commit_root = btrfs_root_node(chunk_root);
|
||||
|
||||
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
|
||||
offsetof(struct btrfs_header, chunk_tree_uuid),
|
||||
@ -3728,7 +3821,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_sysfs;
|
||||
}
|
||||
|
||||
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
|
||||
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
|
||||
"btrfs-cleaner");
|
||||
if (IS_ERR(fs_info->cleaner_kthread))
|
||||
goto fail_sysfs;
|
||||
|
@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
||||
|
||||
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
|
@ -598,7 +598,7 @@ fail:
|
||||
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
int refs_to_drop, int *last_ref)
|
||||
int refs_to_drop)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct btrfs_extent_data_ref *ref1 = NULL;
|
||||
@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (num_refs == 0) {
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
*last_ref = 1;
|
||||
} else {
|
||||
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
|
||||
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
|
||||
@ -1072,8 +1071,7 @@ static noinline_for_stack
|
||||
void update_inline_extent_backref(struct btrfs_path *path,
|
||||
struct btrfs_extent_inline_ref *iref,
|
||||
int refs_to_mod,
|
||||
struct btrfs_delayed_extent_op *extent_op,
|
||||
int *last_ref)
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
struct btrfs_extent_item *ei;
|
||||
@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
|
||||
else
|
||||
btrfs_set_shared_data_ref_count(leaf, sref, refs);
|
||||
} else {
|
||||
*last_ref = 1;
|
||||
size = btrfs_extent_inline_ref_size(type);
|
||||
item_size = btrfs_item_size(leaf, path->slots[0]);
|
||||
ptr = (unsigned long)iref;
|
||||
@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
return -EUCLEAN;
|
||||
}
|
||||
update_inline_extent_backref(path, iref, refs_to_add,
|
||||
extent_op, NULL);
|
||||
update_inline_extent_backref(path, iref, refs_to_add, extent_op);
|
||||
} else if (ret == -ENOENT) {
|
||||
setup_inline_extent_backref(trans->fs_info, path, iref, parent,
|
||||
root_objectid, owner, offset,
|
||||
@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_extent_inline_ref *iref,
|
||||
int refs_to_drop, int is_data, int *last_ref)
|
||||
int refs_to_drop, int is_data)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!is_data && refs_to_drop != 1);
|
||||
if (iref) {
|
||||
update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
|
||||
last_ref);
|
||||
} else if (is_data) {
|
||||
ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
|
||||
last_ref);
|
||||
} else {
|
||||
*last_ref = 1;
|
||||
if (iref)
|
||||
update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
|
||||
else if (is_data)
|
||||
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
|
||||
else
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2766,12 +2758,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
spin_unlock(&cache->lock);
|
||||
if (!readonly && return_free_space &&
|
||||
global_rsv->space_info == space_info) {
|
||||
u64 to_add = len;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
if (!global_rsv->full) {
|
||||
to_add = min(len, global_rsv->size -
|
||||
global_rsv->reserved);
|
||||
u64 to_add = min(len, global_rsv->size -
|
||||
global_rsv->reserved);
|
||||
|
||||
global_rsv->reserved += to_add;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||
space_info, to_add);
|
||||
@ -2862,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, bool is_data)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (is_data) {
|
||||
struct btrfs_root *csum_root;
|
||||
|
||||
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
|
||||
ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop one or more refs of @node.
|
||||
*
|
||||
@ -2943,7 +2963,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
u64 refs;
|
||||
u64 bytenr = node->bytenr;
|
||||
u64 num_bytes = node->num_bytes;
|
||||
int last_ref = 0;
|
||||
bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
|
||||
|
||||
extent_root = btrfs_extent_root(info, bytenr);
|
||||
@ -3010,8 +3029,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
/* Must be SHARED_* item, remove the backref first */
|
||||
ret = remove_extent_backref(trans, extent_root, path,
|
||||
NULL, refs_to_drop, is_data,
|
||||
&last_ref);
|
||||
NULL, refs_to_drop, is_data);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
@ -3136,8 +3154,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
if (found_extent) {
|
||||
ret = remove_extent_backref(trans, extent_root, path,
|
||||
iref, refs_to_drop, is_data,
|
||||
&last_ref);
|
||||
iref, refs_to_drop, is_data);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
@ -3182,7 +3199,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
}
|
||||
|
||||
last_ref = 1;
|
||||
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
|
||||
num_to_del);
|
||||
if (ret) {
|
||||
@ -3191,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (is_data) {
|
||||
struct btrfs_root *csum_root;
|
||||
csum_root = btrfs_csum_root(info, bytenr);
|
||||
ret = btrfs_del_csums(trans, csum_root, bytenr,
|
||||
num_bytes);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
@ -4605,6 +4600,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
u64 num_bytes)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
int ret;
|
||||
|
||||
ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
|
||||
if (ret) {
|
||||
ASSERT(!ret);
|
||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||
bytenr, num_bytes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
u64 parent, u64 root_objectid,
|
||||
u64 flags, u64 owner, u64 offset,
|
||||
@ -4665,18 +4682,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
btrfs_free_path(path);
|
||||
|
||||
ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
|
||||
if (ret) { /* -ENOENT, logic error */
|
||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||
ins->objectid, ins->offset);
|
||||
BUG();
|
||||
}
|
||||
trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
|
||||
return ret;
|
||||
return alloc_reserved_extent(trans, ins->objectid, ins->offset);
|
||||
}
|
||||
|
||||
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||
@ -4694,7 +4700,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_delayed_tree_ref *ref;
|
||||
u32 size = sizeof(*extent_item) + sizeof(*iref);
|
||||
u64 num_bytes;
|
||||
u64 flags = extent_op->flags_to_set;
|
||||
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
|
||||
|
||||
@ -4704,12 +4709,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||
if (skinny_metadata) {
|
||||
extent_key.offset = ref->level;
|
||||
extent_key.type = BTRFS_METADATA_ITEM_KEY;
|
||||
num_bytes = fs_info->nodesize;
|
||||
} else {
|
||||
extent_key.offset = node->num_bytes;
|
||||
extent_key.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
size += sizeof(*block_info);
|
||||
num_bytes = node->num_bytes;
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
@ -4754,22 +4757,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
btrfs_free_path(path);
|
||||
|
||||
ret = remove_from_free_space_tree(trans, extent_key.objectid,
|
||||
num_bytes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_update_block_group(trans, extent_key.objectid,
|
||||
fs_info->nodesize, true);
|
||||
if (ret) { /* -ENOENT, logic error */
|
||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||
extent_key.objectid, extent_key.offset);
|
||||
BUG();
|
||||
}
|
||||
|
||||
trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
|
||||
fs_info->nodesize);
|
||||
return ret;
|
||||
return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
|
||||
}
|
||||
|
||||
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
|
@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
|
||||
* a good copy of the failed sector and if we succeed, we have setup
|
||||
* everything for repair_io_failure to do the rest for us.
|
||||
*/
|
||||
ASSERT(failed_mirror);
|
||||
failrec->failed_mirror = failed_mirror;
|
||||
failrec->this_mirror++;
|
||||
if (failrec->this_mirror == failed_mirror)
|
||||
@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||
const int icsum = bio_offset >> fs_info->sectorsize_bits;
|
||||
struct bio *repair_bio;
|
||||
struct btrfs_bio *repair_bbio;
|
||||
blk_status_t status;
|
||||
|
||||
btrfs_debug(fs_info,
|
||||
"repair read error: read error at %llu", start);
|
||||
@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||
"repair read error: submitting new read to mirror %d",
|
||||
failrec->this_mirror);
|
||||
|
||||
status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
|
||||
failrec->bio_flags);
|
||||
if (status) {
|
||||
free_io_failure(failure_tree, tree, failrec);
|
||||
bio_put(repair_bio);
|
||||
}
|
||||
return blk_status_to_errno(status);
|
||||
/*
|
||||
* At this point we have a bio, so any errors from submit_bio_hook()
|
||||
* will be handled by the endio on the repair_bio, so we can't return an
|
||||
* error here.
|
||||
*/
|
||||
submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
|
||||
@ -3067,6 +3067,14 @@ static void end_bio_extent_readpage(struct bio *bio)
|
||||
goto readpage_ok;
|
||||
|
||||
if (is_data_inode(inode)) {
|
||||
/*
|
||||
* If we failed to submit the IO at all we'll have a
|
||||
* mirror_num == 0, in which case we need to just mark
|
||||
* the page with an error and unlock it and carry on.
|
||||
*/
|
||||
if (mirror == 0)
|
||||
goto readpage_ok;
|
||||
|
||||
/*
|
||||
* btrfs_submit_read_repair() will handle all the good
|
||||
* and bad sectors, we just continue to the next bvec.
|
||||
@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
|
||||
if (em_cached && !IS_ERR_OR_NULL(em)) {
|
||||
if (em_cached && !IS_ERR(em)) {
|
||||
BUG_ON(*em_cached);
|
||||
refcount_inc(&em->refs);
|
||||
*em_cached = em;
|
||||
@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
u64 cur_end;
|
||||
struct extent_map *em;
|
||||
int ret = 0;
|
||||
int nr = 0;
|
||||
size_t pg_offset = 0;
|
||||
size_t iosize;
|
||||
size_t blocksize = inode->i_sb->s_blocksize;
|
||||
@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
}
|
||||
em = __get_extent_map(inode, page, pg_offset, cur,
|
||||
end - cur + 1, em_cached);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
if (IS_ERR(em)) {
|
||||
unlock_extent(tree, cur, end);
|
||||
end_page_read(page, false, cur, end + 1 - cur);
|
||||
ret = PTR_ERR(em);
|
||||
break;
|
||||
}
|
||||
extent_offset = cur - em->start;
|
||||
@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
end_bio_extent_readpage, 0,
|
||||
this_bio_flag,
|
||||
force_bio_submit);
|
||||
if (!ret) {
|
||||
nr++;
|
||||
} else {
|
||||
if (ret) {
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
end_page_read(page, false, cur, iosize);
|
||||
goto out;
|
||||
@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
if (IS_ERR(em)) {
|
||||
btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
|
||||
ret = PTR_ERR_OR_ZERO(em);
|
||||
break;
|
||||
@ -4780,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
|
||||
return ret;
|
||||
}
|
||||
if (cache) {
|
||||
/* Impiles write in zoned mode */
|
||||
btrfs_put_block_group(cache);
|
||||
/* Mark the last eb in a block group */
|
||||
/*
|
||||
* Implies write in zoned mode. Mark the last eb in a block group.
|
||||
*/
|
||||
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
|
||||
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
|
||||
btrfs_put_block_group(cache);
|
||||
}
|
||||
ret = write_one_eb(eb, wbc, epd);
|
||||
free_extent_buffer(eb);
|
||||
@ -5390,7 +5397,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
|
||||
break;
|
||||
len = ALIGN(len, sectorsize);
|
||||
em = btrfs_get_extent_fiemap(inode, offset, len);
|
||||
if (IS_ERR_OR_NULL(em))
|
||||
if (IS_ERR(em))
|
||||
return em;
|
||||
|
||||
/* if this isn't a hole return it */
|
||||
|
@ -492,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
|
||||
*/
|
||||
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
|
||||
{
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
|
||||
rb_erase_cached(&em->rb_node, &tree->map);
|
||||
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
|
||||
@ -506,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
|
||||
struct extent_map *new,
|
||||
int modified)
|
||||
{
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
|
||||
ASSERT(extent_map_in_tree(cur));
|
||||
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
|
||||
|
@ -305,7 +305,7 @@ found:
|
||||
read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
|
||||
ret * csum_size);
|
||||
out:
|
||||
if (ret == -ENOENT)
|
||||
if (ret == -ENOENT || ret == -EFBIG)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_path *path;
|
||||
const u32 sectorsize = fs_info->sectorsize;
|
||||
const u32 csum_size = fs_info->csum_size;
|
||||
@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
u8 *csum;
|
||||
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
|
||||
int count = 0;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
|
||||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
|
||||
@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
if (!dst) {
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
bbio = btrfs_bio(bio);
|
||||
|
||||
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
|
||||
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
|
||||
@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
|
||||
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
|
||||
search_len, csum_dst);
|
||||
if (count <= 0) {
|
||||
/*
|
||||
* Either we hit a critical error or we didn't find
|
||||
* the csum.
|
||||
* Either way, we put zero into the csums dst, and skip
|
||||
* to the next sector.
|
||||
*/
|
||||
if (count < 0) {
|
||||
ret = errno_to_blk_status(count);
|
||||
if (bbio)
|
||||
btrfs_bio_free_csum(bbio);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't find a csum for this range. We need to make sure
|
||||
* we complain loudly about this, because we are not NODATASUM.
|
||||
*
|
||||
* However for the DATA_RELOC inode we could potentially be
|
||||
* relocating data extents for a NODATASUM inode, so the inode
|
||||
* itself won't be marked with NODATASUM, but the extent we're
|
||||
* copying is in fact NODATASUM. If we don't find a csum we
|
||||
* assume this is the case.
|
||||
*/
|
||||
if (count == 0) {
|
||||
memset(csum_dst, 0, csum_size);
|
||||
count = 1;
|
||||
|
||||
/*
|
||||
* For data reloc inode, we need to mark the range
|
||||
* NODATASUM so that balance won't report false csum
|
||||
* error.
|
||||
*/
|
||||
if (BTRFS_I(inode)->root->root_key.objectid ==
|
||||
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
u64 file_offset;
|
||||
@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
}
|
||||
|
||||
btrfs_free_path(path);
|
||||
return BLK_STS_OK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
@ -612,32 +620,33 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
|
||||
/**
|
||||
* Calculate checksums of the data contained inside a bio
|
||||
*
|
||||
* @inode: Owner of the data inside the bio
|
||||
* @bio: Contains the data to be checksummed
|
||||
* @file_start: offset in file this bio begins to describe
|
||||
* @contig: Boolean. If true/1 means all bio vecs in this bio are
|
||||
* contiguous and they begin at @file_start in the file. False/0
|
||||
* means this bio can contain potentially discontiguous bio vecs
|
||||
* so the logical offset of each should be calculated separately.
|
||||
* @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the
|
||||
* file offsets are determined from the page offsets in the bio.
|
||||
* Otherwise, this is the starting file offset of the bio vecs in
|
||||
* @bio, which must be contiguous.
|
||||
* @one_ordered: If true, @bio only refers to one ordered extent.
|
||||
*/
|
||||
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
u64 file_start, int contig)
|
||||
u64 offset, bool one_ordered)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_ordered_extent *ordered = NULL;
|
||||
const bool use_page_offsets = (offset == (u64)-1);
|
||||
char *data;
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bvec;
|
||||
int index;
|
||||
int nr_sectors;
|
||||
unsigned int blockcount;
|
||||
unsigned long total_bytes = 0;
|
||||
unsigned long this_sum_bytes = 0;
|
||||
int i;
|
||||
u64 offset;
|
||||
unsigned nofs_flag;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
@ -651,18 +660,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
sums->len = bio->bi_iter.bi_size;
|
||||
INIT_LIST_HEAD(&sums->list);
|
||||
|
||||
if (contig)
|
||||
offset = file_start;
|
||||
else
|
||||
offset = 0; /* shut up gcc */
|
||||
|
||||
sums->bytenr = bio->bi_iter.bi_sector << 9;
|
||||
index = 0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
if (!contig)
|
||||
if (use_page_offsets)
|
||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
||||
|
||||
if (!ordered) {
|
||||
@ -681,13 +685,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
}
|
||||
}
|
||||
|
||||
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
|
||||
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
|
||||
bvec.bv_len + fs_info->sectorsize
|
||||
- 1);
|
||||
|
||||
for (i = 0; i < nr_sectors; i++) {
|
||||
if (offset >= ordered->file_offset + ordered->num_bytes ||
|
||||
offset < ordered->file_offset) {
|
||||
for (i = 0; i < blockcount; i++) {
|
||||
if (!one_ordered &&
|
||||
!in_range(offset, ordered->file_offset,
|
||||
ordered->num_bytes)) {
|
||||
unsigned long bytes_left;
|
||||
|
||||
sums->len = this_sum_bytes;
|
||||
@ -1211,6 +1216,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
extent_start = key.offset;
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
|
||||
em->generation = btrfs_file_extent_generation(leaf, fi);
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
em->start = extent_start;
|
||||
|
@ -691,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
int modify_tree = -1;
|
||||
int update_refs;
|
||||
int found = 0;
|
||||
int leafs_visited = 0;
|
||||
struct btrfs_path *path = args->path;
|
||||
|
||||
args->bytes_found = 0;
|
||||
@ -729,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
path->slots[0]--;
|
||||
}
|
||||
ret = 0;
|
||||
leafs_visited++;
|
||||
next_slot:
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
@ -741,7 +739,6 @@ next_slot:
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
leafs_visited++;
|
||||
leaf = path->nodes[0];
|
||||
recow = 1;
|
||||
}
|
||||
@ -987,7 +984,7 @@ delete_extent_item:
|
||||
* which case it unlocked our path, so check path->locks[0] matches a
|
||||
* write lock.
|
||||
*/
|
||||
if (!ret && args->replace_extent && leafs_visited == 1 &&
|
||||
if (!ret && args->replace_extent &&
|
||||
path->locks[0] == BTRFS_WRITE_LOCK &&
|
||||
btrfs_leaf_free_space(leaf) >=
|
||||
sizeof(struct btrfs_item) + args->extent_item_size) {
|
||||
@ -1722,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
||||
fs_info->sectorsize);
|
||||
WARN_ON(reserve_bytes == 0);
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
|
||||
reserve_bytes);
|
||||
reserve_bytes,
|
||||
reserve_bytes);
|
||||
if (ret) {
|
||||
if (!only_release_metadata)
|
||||
btrfs_free_reserved_data_space(BTRFS_I(inode),
|
||||
@ -2039,12 +2037,43 @@ out:
|
||||
return err < 0 ? err : written;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *from)
|
||||
static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
loff_t count;
|
||||
ssize_t ret;
|
||||
|
||||
btrfs_inode_lock(inode, 0);
|
||||
count = encoded->len;
|
||||
ret = generic_write_checks_count(iocb, &count);
|
||||
if (ret == 0 && count != encoded->len) {
|
||||
/*
|
||||
* The write got truncated by generic_write_checks_count(). We
|
||||
* can't do a partial encoded write.
|
||||
*/
|
||||
ret = -EFBIG;
|
||||
}
|
||||
if (ret || encoded->len == 0)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_write_check(iocb, from, encoded->len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_do_encoded_write(iocb, from, encoded);
|
||||
out:
|
||||
btrfs_inode_unlock(inode, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
|
||||
ssize_t num_written = 0;
|
||||
ssize_t num_written, num_sync;
|
||||
const bool sync = iocb->ki_flags & IOCB_DSYNC;
|
||||
|
||||
/*
|
||||
@ -2055,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
||||
if (BTRFS_FS_ERROR(inode->root->fs_info))
|
||||
return -EROFS;
|
||||
|
||||
if (!(iocb->ki_flags & IOCB_DIRECT) &&
|
||||
(iocb->ki_flags & IOCB_NOWAIT))
|
||||
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (sync)
|
||||
atomic_inc(&inode->sync_writers);
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT)
|
||||
num_written = btrfs_direct_write(iocb, from);
|
||||
else
|
||||
num_written = btrfs_buffered_write(iocb, from);
|
||||
if (encoded) {
|
||||
num_written = btrfs_encoded_write(iocb, from, encoded);
|
||||
num_sync = encoded->len;
|
||||
} else if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
num_written = num_sync = btrfs_direct_write(iocb, from);
|
||||
} else {
|
||||
num_written = num_sync = btrfs_buffered_write(iocb, from);
|
||||
}
|
||||
|
||||
btrfs_set_inode_last_sub_trans(inode);
|
||||
|
||||
if (num_written > 0)
|
||||
num_written = generic_write_sync(iocb, num_written);
|
||||
if (num_sync > 0) {
|
||||
num_sync = generic_write_sync(iocb, num_sync);
|
||||
if (num_sync < 0)
|
||||
num_written = num_sync;
|
||||
}
|
||||
|
||||
if (sync)
|
||||
atomic_dec(&inode->sync_writers);
|
||||
@ -2079,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
||||
return num_written;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
return btrfs_do_write_iter(iocb, from, NULL);
|
||||
}
|
||||
|
||||
int btrfs_release_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct btrfs_file_private *private = filp->private_data;
|
||||
@ -2474,7 +2514,7 @@ out:
|
||||
hole_em = alloc_extent_map();
|
||||
if (!hole_em) {
|
||||
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
} else {
|
||||
hole_em->start = offset;
|
||||
hole_em->len = end - offset;
|
||||
@ -2495,8 +2535,7 @@ out:
|
||||
} while (ret == -EEXIST);
|
||||
free_extent_map(hole_em);
|
||||
if (ret)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2850,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
|
||||
* maps for the replacement extents (or holes).
|
||||
*/
|
||||
if (extent_info && !extent_info->is_new_extent)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
|
||||
if (ret)
|
||||
goto out_trans;
|
||||
|
@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
|
||||
.offset = 0,
|
||||
};
|
||||
|
||||
if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
|
||||
key.offset = block_group->global_root_id;
|
||||
return btrfs_global_root(block_group->fs_info, &key);
|
||||
}
|
||||
|
||||
|
1183
fs/btrfs/inode.c
1183
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
309
fs/btrfs/ioctl.c
309
fs/btrfs/ioctl.c
@ -28,6 +28,7 @@
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/fileattr.h>
|
||||
#include <linux/fsverity.h>
|
||||
#include <linux/sched/xacct.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "export.h"
|
||||
@ -88,6 +89,24 @@ struct btrfs_ioctl_send_args_32 {
|
||||
|
||||
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
|
||||
struct btrfs_ioctl_send_args_32)
|
||||
|
||||
struct btrfs_ioctl_encoded_io_args_32 {
|
||||
compat_uptr_t iov;
|
||||
compat_ulong_t iovcnt;
|
||||
__s64 offset;
|
||||
__u64 flags;
|
||||
__u64 len;
|
||||
__u64 unencoded_len;
|
||||
__u64 unencoded_offset;
|
||||
__u32 compression;
|
||||
__u32 encryption;
|
||||
__u8 reserved[64];
|
||||
};
|
||||
|
||||
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args_32)
|
||||
#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args_32)
|
||||
#endif
|
||||
|
||||
/* Mask out flags that are inappropriate for the given type of inode. */
|
||||
@ -440,10 +459,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
|
||||
static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
return put_user(inode->i_generation, arg);
|
||||
}
|
||||
|
||||
@ -753,6 +770,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
/* We do not support snapshotting right now. */
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_warn(fs_info,
|
||||
"extent tree v2 doesn't support snapshotting yet");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
|
||||
return -EINVAL;
|
||||
|
||||
@ -1522,6 +1546,7 @@ next:
|
||||
}
|
||||
|
||||
#define CLUSTER_SIZE (SZ_256K)
|
||||
static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
|
||||
|
||||
/*
|
||||
* Defrag one contiguous target range.
|
||||
@ -1667,7 +1692,6 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
|
||||
LIST_HEAD(target_list);
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
|
||||
ret = defrag_collect_targets(inode, start, len, extent_thresh,
|
||||
newer_than, do_compress, false,
|
||||
&target_list, NULL);
|
||||
@ -1810,9 +1834,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||
u64 last_scanned = cur;
|
||||
u64 cluster_end;
|
||||
|
||||
/* The cluster size 256K should always be page aligned */
|
||||
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
|
||||
|
||||
if (btrfs_defrag_cancelled(fs_info)) {
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
@ -2229,10 +2250,9 @@ free_args:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
|
||||
static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
|
||||
void __user *arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
int ret = 0;
|
||||
@ -2562,12 +2582,11 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_tree_search(struct file *file,
|
||||
void __user *argp)
|
||||
static noinline int btrfs_ioctl_tree_search(struct inode *inode,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_search_args __user *uargs;
|
||||
struct btrfs_ioctl_search_key sk;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
size_t buf_size;
|
||||
|
||||
@ -2581,7 +2600,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
|
||||
|
||||
buf_size = sizeof(uargs->buf);
|
||||
|
||||
inode = file_inode(file);
|
||||
ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
|
||||
|
||||
/*
|
||||
@ -2596,12 +2614,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
|
||||
static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_search_args_v2 __user *uarg;
|
||||
struct btrfs_ioctl_search_args_v2 args;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
size_t buf_size;
|
||||
const size_t buf_limit = SZ_16M;
|
||||
@ -2620,7 +2637,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
|
||||
if (buf_size > buf_limit)
|
||||
buf_size = buf_limit;
|
||||
|
||||
inode = file_inode(file);
|
||||
ret = search_ioctl(inode, &args.key, &buf_size,
|
||||
(char __user *)(&uarg->buf[0]));
|
||||
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
|
||||
@ -2871,25 +2887,22 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
|
||||
static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_ino_lookup_args *args;
|
||||
struct inode *inode;
|
||||
int ret = 0;
|
||||
|
||||
args = memdup_user(argp, sizeof(*args));
|
||||
if (IS_ERR(args))
|
||||
return PTR_ERR(args);
|
||||
|
||||
inode = file_inode(file);
|
||||
|
||||
/*
|
||||
* Unprivileged query to obtain the containing subvolume root id. The
|
||||
* path is reset so it's consistent with btrfs_search_path_in_tree.
|
||||
*/
|
||||
if (args->treeid == 0)
|
||||
args->treeid = BTRFS_I(inode)->root->root_key.objectid;
|
||||
args->treeid = root->root_key.objectid;
|
||||
|
||||
if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
args->name[0] = 0;
|
||||
@ -2901,7 +2914,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
|
||||
ret = btrfs_search_path_in_tree(root->fs_info,
|
||||
args->treeid, args->objectid,
|
||||
args->name);
|
||||
|
||||
@ -2957,7 +2970,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
|
||||
}
|
||||
|
||||
/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
|
||||
static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
|
||||
static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_get_subvol_info_args *subvol_info;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
@ -2969,7 +2982,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
|
||||
struct extent_buffer *leaf;
|
||||
unsigned long item_off;
|
||||
unsigned long item_len;
|
||||
struct inode *inode;
|
||||
int slot;
|
||||
int ret = 0;
|
||||
|
||||
@ -2983,7 +2995,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
inode = file_inode(file);
|
||||
fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
|
||||
/* Get root_item of inode's subvolume */
|
||||
@ -3077,15 +3088,14 @@ out_free:
|
||||
* Return ROOT_REF information of the subvolume containing this inode
|
||||
* except the subvolume name.
|
||||
*/
|
||||
static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
|
||||
static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
|
||||
void __user *argp)
|
||||
{
|
||||
struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
|
||||
struct btrfs_root_ref *rref;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct extent_buffer *leaf;
|
||||
struct inode *inode;
|
||||
u64 objectid;
|
||||
int slot;
|
||||
int ret;
|
||||
@ -3101,15 +3111,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
|
||||
return PTR_ERR(rootrefs);
|
||||
}
|
||||
|
||||
inode = file_inode(file);
|
||||
root = BTRFS_I(inode)->root->fs_info->tree_root;
|
||||
objectid = BTRFS_I(inode)->root->root_key.objectid;
|
||||
|
||||
objectid = root->root_key.objectid;
|
||||
key.objectid = objectid;
|
||||
key.type = BTRFS_ROOT_REF_KEY;
|
||||
key.offset = rootrefs->min_treeid;
|
||||
found = 0;
|
||||
|
||||
root = root->fs_info->tree_root;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
@ -3189,6 +3197,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
|
||||
int err = 0;
|
||||
bool destroy_parent = false;
|
||||
|
||||
/* We don't support snapshots with extent tree v2 yet. */
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info,
|
||||
"extent tree v2 doesn't support snapshot deletion yet");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (destroy_v2) {
|
||||
vol_args2 = memdup_user(arg, sizeof(*vol_args2));
|
||||
if (IS_ERR(vol_args2))
|
||||
@ -3464,6 +3479,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
|
||||
if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
|
||||
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
@ -3989,6 +4009,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
sa = memdup_user(arg, sizeof(*sa));
|
||||
if (IS_ERR(sa))
|
||||
return PTR_ERR(sa);
|
||||
@ -4088,6 +4113,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
p = memdup_user(arg, sizeof(*p));
|
||||
if (IS_ERR(p))
|
||||
return PTR_ERR(p);
|
||||
@ -5149,7 +5179,7 @@ out_drop_write:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
|
||||
static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_send_args *arg;
|
||||
int ret;
|
||||
@ -5179,11 +5209,194 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
|
||||
if (IS_ERR(arg))
|
||||
return PTR_ERR(arg);
|
||||
}
|
||||
ret = btrfs_ioctl_send(file, arg);
|
||||
ret = btrfs_ioctl_send(inode, arg);
|
||||
kfree(arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
|
||||
bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_encoded_io_args args = { 0 };
|
||||
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
|
||||
flags);
|
||||
size_t copy_end;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (compat) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
|
||||
flags);
|
||||
if (copy_from_user(&args32, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
args.iov = compat_ptr(args32.iov);
|
||||
args.iovcnt = args32.iovcnt;
|
||||
args.offset = args32.offset;
|
||||
args.flags = args32.flags;
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
copy_end = copy_end_kernel;
|
||||
if (copy_from_user(&args, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
if (args.flags != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
|
||||
&iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
if (iov_iter_count(&iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_iov;
|
||||
}
|
||||
pos = args.offset;
|
||||
ret = rw_verify_area(READ, file, &pos, args.len);
|
||||
if (ret < 0)
|
||||
goto out_iov;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args);
|
||||
if (ret >= 0) {
|
||||
fsnotify_access(file);
|
||||
if (copy_to_user(argp + copy_end,
|
||||
(char *)&args + copy_end_kernel,
|
||||
sizeof(args) - copy_end_kernel))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
out_iov:
|
||||
kfree(iov);
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_rchar(current, ret);
|
||||
inc_syscr(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_encoded_io_args args;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (!(file->f_mode & FMODE_WRITE)) {
|
||||
ret = -EBADF;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (compat) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
if (copy_from_user(&args32, argp, sizeof(args32))) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
args.iov = compat_ptr(args32.iov);
|
||||
args.iovcnt = args32.iovcnt;
|
||||
args.offset = args32.offset;
|
||||
args.flags = args32.flags;
|
||||
args.len = args32.len;
|
||||
args.unencoded_len = args32.unencoded_len;
|
||||
args.unencoded_offset = args32.unencoded_offset;
|
||||
args.compression = args32.compression;
|
||||
args.encryption = args32.encryption;
|
||||
memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
if (copy_from_user(&args, argp, sizeof(args))) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
if (args.flags != 0)
|
||||
goto out_acct;
|
||||
if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
|
||||
goto out_acct;
|
||||
if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
|
||||
args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
|
||||
goto out_acct;
|
||||
if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
|
||||
args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
|
||||
goto out_acct;
|
||||
if (args.unencoded_offset > args.unencoded_len)
|
||||
goto out_acct;
|
||||
if (args.len > args.unencoded_len - args.unencoded_offset)
|
||||
goto out_acct;
|
||||
|
||||
ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
|
||||
&iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
file_start_write(file);
|
||||
|
||||
if (iov_iter_count(&iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_end_write;
|
||||
}
|
||||
pos = args.offset;
|
||||
ret = rw_verify_area(WRITE, file, &pos, args.len);
|
||||
if (ret < 0)
|
||||
goto out_end_write;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
ret = kiocb_set_rw_flags(&kiocb, 0);
|
||||
if (ret)
|
||||
goto out_end_write;
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
ret = btrfs_do_write_iter(&kiocb, &iter, &args);
|
||||
if (ret > 0)
|
||||
fsnotify_modify(file);
|
||||
|
||||
out_end_write:
|
||||
file_end_write(file);
|
||||
kfree(iov);
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_wchar(current, ret);
|
||||
inc_syscw(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@ -5194,7 +5407,7 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETVERSION:
|
||||
return btrfs_ioctl_getversion(file, argp);
|
||||
return btrfs_ioctl_getversion(inode, argp);
|
||||
case FS_IOC_GETFSLABEL:
|
||||
return btrfs_ioctl_get_fslabel(fs_info, argp);
|
||||
case FS_IOC_SETFSLABEL:
|
||||
@ -5214,7 +5427,7 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_SNAP_DESTROY_V2:
|
||||
return btrfs_ioctl_snap_destroy(file, argp, true);
|
||||
case BTRFS_IOC_SUBVOL_GETFLAGS:
|
||||
return btrfs_ioctl_subvol_getflags(file, argp);
|
||||
return btrfs_ioctl_subvol_getflags(inode, argp);
|
||||
case BTRFS_IOC_SUBVOL_SETFLAGS:
|
||||
return btrfs_ioctl_subvol_setflags(file, argp);
|
||||
case BTRFS_IOC_DEFAULT_SUBVOL:
|
||||
@ -5238,11 +5451,11 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_BALANCE:
|
||||
return btrfs_ioctl_balance(file, NULL);
|
||||
case BTRFS_IOC_TREE_SEARCH:
|
||||
return btrfs_ioctl_tree_search(file, argp);
|
||||
return btrfs_ioctl_tree_search(inode, argp);
|
||||
case BTRFS_IOC_TREE_SEARCH_V2:
|
||||
return btrfs_ioctl_tree_search_v2(file, argp);
|
||||
return btrfs_ioctl_tree_search_v2(inode, argp);
|
||||
case BTRFS_IOC_INO_LOOKUP:
|
||||
return btrfs_ioctl_ino_lookup(file, argp);
|
||||
return btrfs_ioctl_ino_lookup(root, argp);
|
||||
case BTRFS_IOC_INO_PATHS:
|
||||
return btrfs_ioctl_ino_to_path(root, argp);
|
||||
case BTRFS_IOC_LOGICAL_INO:
|
||||
@ -5289,10 +5502,10 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_set_received_subvol_32(file, argp);
|
||||
#endif
|
||||
case BTRFS_IOC_SEND:
|
||||
return _btrfs_ioctl_send(file, argp, false);
|
||||
return _btrfs_ioctl_send(inode, argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_SEND_32:
|
||||
return _btrfs_ioctl_send(file, argp, true);
|
||||
return _btrfs_ioctl_send(inode, argp, true);
|
||||
#endif
|
||||
case BTRFS_IOC_GET_DEV_STATS:
|
||||
return btrfs_ioctl_get_dev_stats(fs_info, argp);
|
||||
@ -5319,15 +5532,25 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_SET_FEATURES:
|
||||
return btrfs_ioctl_set_features(file, argp);
|
||||
case BTRFS_IOC_GET_SUBVOL_INFO:
|
||||
return btrfs_ioctl_get_subvol_info(file, argp);
|
||||
return btrfs_ioctl_get_subvol_info(inode, argp);
|
||||
case BTRFS_IOC_GET_SUBVOL_ROOTREF:
|
||||
return btrfs_ioctl_get_subvol_rootref(file, argp);
|
||||
return btrfs_ioctl_get_subvol_rootref(root, argp);
|
||||
case BTRFS_IOC_INO_LOOKUP_USER:
|
||||
return btrfs_ioctl_ino_lookup_user(file, argp);
|
||||
case FS_IOC_ENABLE_VERITY:
|
||||
return fsverity_ioctl_enable(file, (const void __user *)argp);
|
||||
case FS_IOC_MEASURE_VERITY:
|
||||
return fsverity_ioctl_measure(file, argp);
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
return btrfs_ioctl_encoded_read(file, argp, false);
|
||||
case BTRFS_IOC_ENCODED_WRITE:
|
||||
return btrfs_ioctl_encoded_write(file, argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_ENCODED_READ_32:
|
||||
return btrfs_ioctl_encoded_read(file, argp, true);
|
||||
case BTRFS_IOC_ENCODED_WRITE_32:
|
||||
return btrfs_ioctl_encoded_write(file, argp, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
@ -55,6 +55,9 @@
|
||||
* 0x1000 | SegHdr N+1| Data payload N+1 ... |
|
||||
*/
|
||||
|
||||
#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
|
||||
#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
|
||||
|
||||
struct workspace {
|
||||
void *mem;
|
||||
void *buf; /* where decompressed data goes */
|
||||
@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
|
||||
workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
|
||||
workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
|
||||
workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
|
||||
workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
|
||||
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
|
||||
goto fail;
|
||||
|
||||
@ -380,7 +383,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
kunmap(cur_page);
|
||||
cur_in += LZO_LEN;
|
||||
|
||||
if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
|
||||
if (seg_len > WORKSPACE_CBUF_LENGTH) {
|
||||
/*
|
||||
* seg_len shouldn't be larger than we have allocated
|
||||
* for workspace->cbuf
|
||||
@ -433,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
size_t in_len;
|
||||
size_t out_len;
|
||||
size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
|
||||
size_t max_segment_len = WORKSPACE_BUF_LENGTH;
|
||||
int ret = 0;
|
||||
char *kaddr;
|
||||
unsigned long bytes;
|
||||
|
@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and add a new ordered_extent into the per-inode tree.
|
||||
/**
|
||||
* Add an ordered extent to the per-inode tree.
|
||||
*
|
||||
* The tree is given a single reference on the ordered extent that was
|
||||
* inserted.
|
||||
* @inode: Inode that this extent is for.
|
||||
* @file_offset: Logical offset in file where the extent starts.
|
||||
* @num_bytes: Logical length of extent in file.
|
||||
* @ram_bytes: Full length of unencoded data.
|
||||
* @disk_bytenr: Offset of extent on disk.
|
||||
* @disk_num_bytes: Size of extent on disk.
|
||||
* @offset: Offset into unencoded data where file data starts.
|
||||
* @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
|
||||
* @compress_type: Compression algorithm used for data.
|
||||
*
|
||||
* Most of these parameters correspond to &struct btrfs_file_extent_item. The
|
||||
* tree is given a single reference on the ordered extent that was inserted.
|
||||
*
|
||||
* Return: 0 or -ENOMEM.
|
||||
*/
|
||||
static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type, int dio,
|
||||
int compress_type)
|
||||
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
|
||||
u64 disk_num_bytes, u64 offset, unsigned flags,
|
||||
int compress_type)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
|
||||
struct btrfs_ordered_extent *entry;
|
||||
int ret;
|
||||
|
||||
if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
|
||||
if (flags &
|
||||
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
|
||||
/* For nocow write, we can release the qgroup rsv right now */
|
||||
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
|
||||
if (ret < 0)
|
||||
@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
|
||||
return -ENOMEM;
|
||||
|
||||
entry->file_offset = file_offset;
|
||||
entry->disk_bytenr = disk_bytenr;
|
||||
entry->num_bytes = num_bytes;
|
||||
entry->ram_bytes = ram_bytes;
|
||||
entry->disk_bytenr = disk_bytenr;
|
||||
entry->disk_num_bytes = disk_num_bytes;
|
||||
entry->offset = offset;
|
||||
entry->bytes_left = num_bytes;
|
||||
entry->inode = igrab(&inode->vfs_inode);
|
||||
entry->compress_type = compress_type;
|
||||
@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
|
||||
entry->qgroup_rsv = ret;
|
||||
entry->physical = (u64)-1;
|
||||
|
||||
ASSERT(type == BTRFS_ORDERED_REGULAR ||
|
||||
type == BTRFS_ORDERED_NOCOW ||
|
||||
type == BTRFS_ORDERED_PREALLOC ||
|
||||
type == BTRFS_ORDERED_COMPRESSED);
|
||||
set_bit(type, &entry->flags);
|
||||
ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
|
||||
entry->flags = flags;
|
||||
|
||||
percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
|
||||
fs_info->delalloc_batch);
|
||||
|
||||
if (dio)
|
||||
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
|
||||
|
||||
/* one ref for the tree */
|
||||
refcount_set(&entry->refs, 1);
|
||||
init_waitqueue_head(&entry->wait);
|
||||
@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||
int type)
|
||||
{
|
||||
ASSERT(type == BTRFS_ORDERED_REGULAR ||
|
||||
type == BTRFS_ORDERED_NOCOW ||
|
||||
type == BTRFS_ORDERED_PREALLOC);
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes, type, 0,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type)
|
||||
{
|
||||
ASSERT(type == BTRFS_ORDERED_REGULAR ||
|
||||
type == BTRFS_ORDERED_NOCOW ||
|
||||
type == BTRFS_ORDERED_PREALLOC);
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes, type, 1,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int compress_type)
|
||||
{
|
||||
ASSERT(compress_type != BTRFS_COMPRESS_NONE);
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes,
|
||||
BTRFS_ORDERED_COMPRESSED, 0,
|
||||
compress_type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
|
||||
* when an ordered extent is finished. If the list covers more than one
|
||||
@ -548,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
spin_lock(&btrfs_inode->lock);
|
||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||
spin_unlock(&btrfs_inode->lock);
|
||||
if (root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
|
||||
false);
|
||||
if (root != fs_info->tree_root) {
|
||||
u64 release;
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
|
||||
release = entry->disk_num_bytes;
|
||||
else
|
||||
release = entry->num_bytes;
|
||||
btrfs_delalloc_release_metadata(btrfs_inode, release, false);
|
||||
}
|
||||
|
||||
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
|
||||
fs_info->delalloc_batch);
|
||||
@ -1052,42 +1032,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
u64 file_offset = ordered->file_offset + pos;
|
||||
u64 disk_bytenr = ordered->disk_bytenr + pos;
|
||||
u64 num_bytes = len;
|
||||
u64 disk_num_bytes = len;
|
||||
int type;
|
||||
unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
|
||||
int compress_type = ordered->compress_type;
|
||||
unsigned long weight;
|
||||
int ret;
|
||||
|
||||
weight = hweight_long(flags_masked);
|
||||
WARN_ON_ONCE(weight > 1);
|
||||
if (!weight)
|
||||
type = 0;
|
||||
else
|
||||
type = __ffs(flags_masked);
|
||||
unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
|
||||
|
||||
/*
|
||||
* The splitting extent is already counted and will be added again
|
||||
* in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
|
||||
* double counting.
|
||||
* The splitting extent is already counted and will be added again in
|
||||
* btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
|
||||
*/
|
||||
percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
|
||||
percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
|
||||
fs_info->delalloc_batch);
|
||||
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
|
||||
WARN_ON_ONCE(1);
|
||||
ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
|
||||
file_offset, disk_bytenr, num_bytes,
|
||||
disk_num_bytes, compress_type);
|
||||
} else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
|
||||
ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
|
||||
disk_bytenr, num_bytes, disk_num_bytes, type);
|
||||
} else {
|
||||
ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
|
||||
disk_bytenr, num_bytes, disk_num_bytes, type);
|
||||
}
|
||||
|
||||
return ret;
|
||||
WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
|
||||
return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
|
||||
disk_bytenr, len, 0, flags,
|
||||
ordered->compress_type);
|
||||
}
|
||||
|
||||
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
|
||||
|
@ -74,8 +74,18 @@ enum {
|
||||
BTRFS_ORDERED_LOGGED_CSUM,
|
||||
/* We wait for this extent to complete in the current transaction */
|
||||
BTRFS_ORDERED_PENDING,
|
||||
/* BTRFS_IOC_ENCODED_WRITE */
|
||||
BTRFS_ORDERED_ENCODED,
|
||||
};
|
||||
|
||||
/* BTRFS_ORDERED_* flags that specify the type of the extent. */
|
||||
#define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) | \
|
||||
(1UL << BTRFS_ORDERED_NOCOW) | \
|
||||
(1UL << BTRFS_ORDERED_PREALLOC) | \
|
||||
(1UL << BTRFS_ORDERED_COMPRESSED) | \
|
||||
(1UL << BTRFS_ORDERED_DIRECT) | \
|
||||
(1UL << BTRFS_ORDERED_ENCODED))
|
||||
|
||||
struct btrfs_ordered_extent {
|
||||
/* logical offset in the file */
|
||||
u64 file_offset;
|
||||
@ -84,9 +94,11 @@ struct btrfs_ordered_extent {
|
||||
* These fields directly correspond to the same fields in
|
||||
* btrfs_file_extent_item.
|
||||
*/
|
||||
u64 disk_bytenr;
|
||||
u64 num_bytes;
|
||||
u64 ram_bytes;
|
||||
u64 disk_bytenr;
|
||||
u64 disk_num_bytes;
|
||||
u64 offset;
|
||||
|
||||
/* number of bytes that still need writing */
|
||||
u64 bytes_left;
|
||||
@ -179,14 +191,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
|
||||
struct btrfs_ordered_extent **cached,
|
||||
u64 file_offset, u64 io_size);
|
||||
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||
int type);
|
||||
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type);
|
||||
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int compress_type);
|
||||
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
|
||||
u64 disk_num_bytes, u64 offset, unsigned flags,
|
||||
int compress_type);
|
||||
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum);
|
||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
|
||||
|
@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
|
||||
{ BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" },
|
||||
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" },
|
||||
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },
|
||||
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },
|
||||
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" },
|
||||
};
|
||||
|
||||
@ -391,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
|
||||
btrfs_header_owner(c),
|
||||
btrfs_node_ptr_generation(c, i),
|
||||
level - 1, &first_key);
|
||||
if (IS_ERR(next)) {
|
||||
if (IS_ERR(next))
|
||||
continue;
|
||||
} else if (!extent_buffer_uptodate(next)) {
|
||||
if (!extent_buffer_uptodate(next)) {
|
||||
free_extent_buffer(next);
|
||||
continue;
|
||||
}
|
||||
|
@ -25,18 +25,6 @@
|
||||
#include "sysfs.h"
|
||||
#include "tree-mod-log.h"
|
||||
|
||||
/* TODO XXX FIXME
|
||||
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
||||
* - reorganize keys
|
||||
* - compressed
|
||||
* - sync
|
||||
* - copy also limits on subvol creation
|
||||
* - limit
|
||||
* - caches for ulists
|
||||
* - performance benchmarks
|
||||
* - check all ioctl parameters
|
||||
*/
|
||||
|
||||
/*
|
||||
* Helpers to access qgroup reservation
|
||||
*
|
||||
@ -258,16 +246,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with qgroup_lock held */
|
||||
static int add_relation_rb(struct btrfs_fs_info *fs_info,
|
||||
u64 memberid, u64 parentid)
|
||||
/*
|
||||
* Add relation specified by two qgroups.
|
||||
*
|
||||
* Must be called with qgroup_lock held.
|
||||
*
|
||||
* Return: 0 on success
|
||||
* -ENOENT if one of the qgroups is NULL
|
||||
* <0 other errors
|
||||
*/
|
||||
static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
|
||||
{
|
||||
struct btrfs_qgroup *member;
|
||||
struct btrfs_qgroup *parent;
|
||||
struct btrfs_qgroup_list *list;
|
||||
|
||||
member = find_qgroup_rb(fs_info, memberid);
|
||||
parent = find_qgroup_rb(fs_info, parentid);
|
||||
if (!member || !parent)
|
||||
return -ENOENT;
|
||||
|
||||
@ -283,7 +274,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with qgroup_lock held */
|
||||
/*
|
||||
* Add relation specified by two qgoup ids.
|
||||
*
|
||||
* Must be called with qgroup_lock held.
|
||||
*
|
||||
* Return: 0 on success
|
||||
* -ENOENT if one of the ids does not exist
|
||||
* <0 other errors
|
||||
*/
|
||||
static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
|
||||
{
|
||||
struct btrfs_qgroup *member;
|
||||
struct btrfs_qgroup *parent;
|
||||
|
||||
member = find_qgroup_rb(fs_info, memberid);
|
||||
parent = find_qgroup_rb(fs_info, parentid);
|
||||
|
||||
return __add_relation_rb(member, parent);
|
||||
}
|
||||
|
||||
/* Must be called with qgroup_lock held */
|
||||
static int del_relation_rb(struct btrfs_fs_info *fs_info,
|
||||
u64 memberid, u64 parentid)
|
||||
{
|
||||
@ -948,6 +959,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
|
||||
*/
|
||||
lockdep_assert_held_write(&fs_info->subvol_sem);
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info,
|
||||
"qgroups are currently unsupported in extent tree v2");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
if (fs_info->quota_root)
|
||||
goto out;
|
||||
@ -1451,7 +1468,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
ret = add_relation_rb(fs_info, src, dst);
|
||||
ret = __add_relation_rb(member, parent);
|
||||
if (ret < 0) {
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
goto out;
|
||||
@ -3268,7 +3285,8 @@ out:
|
||||
static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return btrfs_fs_closing(fs_info) ||
|
||||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
||||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
|
||||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
|
||||
}
|
||||
|
||||
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
@ -3298,11 +3316,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
err = PTR_ERR(trans);
|
||||
break;
|
||||
}
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
||||
err = -EINTR;
|
||||
} else {
|
||||
err = qgroup_rescan_leaf(trans, path);
|
||||
}
|
||||
|
||||
err = qgroup_rescan_leaf(trans, path);
|
||||
|
||||
if (err > 0)
|
||||
btrfs_commit_transaction(trans);
|
||||
else
|
||||
@ -3316,7 +3332,7 @@ out:
|
||||
if (err > 0 &&
|
||||
fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
} else if (err < 0) {
|
||||
} else if (err < 0 || stopped) {
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
}
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
@ -277,7 +277,7 @@ copy_inline_extent:
|
||||
path->slots[0]),
|
||||
size);
|
||||
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(dst));
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
|
||||
out:
|
||||
if (!ret && !trans) {
|
||||
@ -494,7 +494,8 @@ process_slot:
|
||||
&clone_info, &trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
} else {
|
||||
ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
|
||||
/*
|
||||
* Inline extents always have to start at file offset 0
|
||||
* and can never be bigger then the sector size. We can
|
||||
@ -505,8 +506,12 @@ process_slot:
|
||||
*/
|
||||
ASSERT(key.offset == 0);
|
||||
ASSERT(datal <= fs_info->sectorsize);
|
||||
if (key.offset != 0 || datal > fs_info->sectorsize)
|
||||
return -EUCLEAN;
|
||||
if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
|
||||
WARN_ON(key.offset != 0) ||
|
||||
WARN_ON(datal > fs_info->sectorsize)) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = clone_copy_inline_extent(inode, path, &new_key,
|
||||
drop_start, datal, size,
|
||||
@ -518,17 +523,22 @@ process_slot:
|
||||
btrfs_release_path(path);
|
||||
|
||||
/*
|
||||
* If this is a new extent update the last_reflink_trans of both
|
||||
* inodes. This is used by fsync to make sure it does not log
|
||||
* multiple checksum items with overlapping ranges. For older
|
||||
* extents we don't need to do it since inode logging skips the
|
||||
* checksums for older extents. Also ignore holes and inline
|
||||
* extents because they don't have checksums in the csum tree.
|
||||
* Whenever we share an extent we update the last_reflink_trans
|
||||
* of each inode to the current transaction. This is needed to
|
||||
* make sure fsync does not log multiple checksum items with
|
||||
* overlapping ranges (because some extent items might refer
|
||||
* only to sections of the original extent). For the destination
|
||||
* inode we do this regardless of the generation of the extents
|
||||
* or even if they are inline extents or explicit holes, to make
|
||||
* sure a full fsync does not skip them. For the source inode,
|
||||
* we only need to update last_reflink_trans in case it's a new
|
||||
* extent that is not a hole or an inline extent, to deal with
|
||||
* the checksums problem on fsync.
|
||||
*/
|
||||
if (extent_gen == trans->transid && disko > 0) {
|
||||
if (extent_gen == trans->transid && disko > 0)
|
||||
BTRFS_I(src)->last_reflink_trans = trans->transid;
|
||||
BTRFS_I(inode)->last_reflink_trans = trans->transid;
|
||||
}
|
||||
|
||||
BTRFS_I(inode)->last_reflink_trans = trans->transid;
|
||||
|
||||
last_dest_end = ALIGN(new_key.offset + datal,
|
||||
fs_info->sectorsize);
|
||||
@ -575,8 +585,7 @@ process_slot:
|
||||
* replaced file extent items.
|
||||
*/
|
||||
if (last_dest_end >= i_size_read(inode))
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(inode));
|
||||
|
||||
ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
|
||||
last_dest_end, destoff + len - 1, NULL, &trans);
|
||||
@ -772,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
||||
if (btrfs_root_readonly(root_out))
|
||||
return -EROFS;
|
||||
|
||||
if (file_in->f_path.mnt != file_out->f_path.mnt ||
|
||||
inode_in->i_sb != inode_out->i_sb)
|
||||
return -EXDEV;
|
||||
ASSERT(inode_in->i_sb == inode_out->i_sb);
|
||||
}
|
||||
|
||||
/* Don't make the dst file partly checksummed */
|
||||
|
@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
|
||||
|
||||
eb = read_tree_block(fs_info, block->bytenr, block->owner,
|
||||
block->key.offset, block->level, NULL);
|
||||
if (IS_ERR(eb)) {
|
||||
if (IS_ERR(eb))
|
||||
return PTR_ERR(eb);
|
||||
} else if (!extent_buffer_uptodate(eb)) {
|
||||
if (!extent_buffer_uptodate(eb)) {
|
||||
free_extent_buffer(eb);
|
||||
return -EIO;
|
||||
}
|
||||
@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
||||
|
||||
/* Reserve metadata for this range */
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
|
||||
clamped_len);
|
||||
clamped_len, clamped_len);
|
||||
if (ret)
|
||||
goto release_page;
|
||||
|
||||
@ -4123,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
|
||||
* this function resumes merging reloc trees with corresponding fs trees.
|
||||
* this is important for keeping the sharing of tree blocks
|
||||
*/
|
||||
int btrfs_recover_relocation(struct btrfs_root *root)
|
||||
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
LIST_HEAD(reloc_roots);
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root *fs_root;
|
||||
@ -4166,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
|
||||
key.type != BTRFS_ROOT_ITEM_KEY)
|
||||
break;
|
||||
|
||||
reloc_root = btrfs_read_tree_root(root, &key);
|
||||
reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
|
||||
if (IS_ERR(reloc_root)) {
|
||||
err = PTR_ERR(reloc_root);
|
||||
goto out;
|
||||
|
@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
u64 generation;
|
||||
int mirror_num;
|
||||
struct btrfs_key key;
|
||||
u64 increment = map->stripe_len;
|
||||
u64 increment;
|
||||
u64 offset;
|
||||
u64 extent_logical;
|
||||
u64 extent_physical;
|
||||
|
@ -528,17 +528,12 @@ out:
|
||||
|
||||
static int fs_path_copy(struct fs_path *p, struct fs_path *from)
|
||||
{
|
||||
int ret;
|
||||
|
||||
p->reversed = from->reversed;
|
||||
fs_path_reset(p);
|
||||
|
||||
ret = fs_path_add_path(p, from);
|
||||
|
||||
return ret;
|
||||
return fs_path_add_path(p, from);
|
||||
}
|
||||
|
||||
|
||||
static void fs_path_unreverse(struct fs_path *p)
|
||||
{
|
||||
char *tmp;
|
||||
@ -7477,10 +7472,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
|
||||
root->root_key.objectid, root->dedupe_in_progress);
|
||||
}
|
||||
|
||||
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
|
||||
struct btrfs_root *send_root = BTRFS_I(inode)->root;
|
||||
struct btrfs_fs_info *fs_info = send_root->fs_info;
|
||||
struct btrfs_root *clone_root;
|
||||
struct send_ctx *sctx = NULL;
|
||||
|
@ -126,7 +126,7 @@ enum {
|
||||
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
|
||||
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
|
||||
u64 thresh = div_factor_fine(space_info->total_bytes, 90);
|
||||
u64 used;
|
||||
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
|
||||
/* If we're just plain full then async reclaim just slows us down. */
|
||||
if ((space_info->bytes_used + space_info->bytes_reserved +
|
||||
global_rsv_size) >= thresh)
|
||||
@ -1061,7 +1063,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
|
||||
trans_rsv->reserved;
|
||||
if (block_rsv_size < space_info->bytes_may_use)
|
||||
delalloc_size = space_info->bytes_may_use - block_rsv_size;
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/*
|
||||
* We don't want to include the global_rsv in our calculation,
|
||||
@ -1092,6 +1093,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
|
||||
flush = FLUSH_DELAYED_REFS_NR;
|
||||
}
|
||||
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/*
|
||||
* We don't want to reclaim everything, just a portion, so scale
|
||||
* down the to_reclaim by 1/4. If it takes us down to 0,
|
||||
|
@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
|
||||
|
||||
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define STATE_STRING_PREFACE ": state "
|
||||
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
|
||||
|
||||
/*
|
||||
* Characters to print to indicate error conditions or uncommon filesystem sate.
|
||||
* RO is not an error.
|
||||
*/
|
||||
static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_ERROR] = 'E',
|
||||
[BTRFS_FS_STATE_REMOUNTING] = 'M',
|
||||
[BTRFS_FS_STATE_RO] = 0,
|
||||
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
|
||||
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
|
||||
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
|
||||
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
|
||||
};
|
||||
|
||||
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
|
||||
{
|
||||
unsigned int bit;
|
||||
bool states_printed = false;
|
||||
unsigned long fs_state = READ_ONCE(info->fs_state);
|
||||
char *curr = buf;
|
||||
|
||||
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
|
||||
curr += sizeof(STATE_STRING_PREFACE) - 1;
|
||||
|
||||
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
|
||||
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
|
||||
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
|
||||
*curr++ = fs_state_chars[bit];
|
||||
states_printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* If no states were printed, reset the buffer */
|
||||
if (!states_printed)
|
||||
curr = buf;
|
||||
|
||||
*curr++ = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generally the error codes correspond to their respective errors, but there
|
||||
* are a few special cases.
|
||||
@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
#ifdef CONFIG_PRINTK
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
const char *errstr;
|
||||
#endif
|
||||
|
||||
@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
errstr = btrfs_decode_error(errno);
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
if (fmt) {
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
|
||||
sb->s_id, function, line, errno, errstr, &vaf);
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr, &vaf);
|
||||
va_end(args);
|
||||
} else {
|
||||
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
|
||||
sb->s_id, function, line, errno, errstr);
|
||||
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
|
||||
sb->s_id, statestr, function, line, errno, errstr);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
|
||||
vaf.va = &args;
|
||||
|
||||
if (__ratelimit(ratelimit)) {
|
||||
if (fs_info)
|
||||
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
|
||||
fs_info->sb->s_id, &vaf);
|
||||
else
|
||||
if (fs_info) {
|
||||
char statestr[STATE_STRING_BUF_LEN];
|
||||
|
||||
btrfs_state_to_string(fs_info, statestr);
|
||||
printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
|
||||
fs_info->sb->s_id, statestr, &vaf);
|
||||
} else {
|
||||
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||
}
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
@ -861,6 +913,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
break;
|
||||
case Opt_space_cache:
|
||||
case Opt_space_cache_version:
|
||||
/*
|
||||
* We already set FREE_SPACE_TREE above because we have
|
||||
* compat_ro(FREE_SPACE_TREE) set, and we aren't going
|
||||
* to allow v1 to be set for extent tree v2, simply
|
||||
* ignore this setting if we're extent tree v2.
|
||||
*/
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
|
||||
break;
|
||||
if (token == Opt_space_cache ||
|
||||
strcmp(args[0].from, "v1") == 0) {
|
||||
btrfs_clear_opt(info->mount_opt,
|
||||
@ -881,6 +941,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
|
||||
break;
|
||||
case Opt_no_space_cache:
|
||||
/*
|
||||
* We cannot operate without the free space tree with
|
||||
* extent tree v2, ignore this option.
|
||||
*/
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
|
||||
break;
|
||||
if (btrfs_test_opt(info, SPACE_CACHE)) {
|
||||
btrfs_clear_and_info(info, SPACE_CACHE,
|
||||
"disabling disk space caching");
|
||||
@ -896,6 +962,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
"the 'inode_cache' option is deprecated and has no effect since 5.11");
|
||||
break;
|
||||
case Opt_clear_cache:
|
||||
/*
|
||||
* We cannot clear the free space tree with extent tree
|
||||
* v2, ignore this option.
|
||||
*/
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
|
||||
break;
|
||||
btrfs_set_and_info(info, CLEAR_CACHE,
|
||||
"force clearing of disk cache");
|
||||
break;
|
||||
@ -2383,6 +2455,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
{
|
||||
struct btrfs_ioctl_vol_args *vol;
|
||||
struct btrfs_device *device = NULL;
|
||||
dev_t devt = 0;
|
||||
int ret = -ENOTTY;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
@ -2402,7 +2475,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
mutex_unlock(&uuid_mutex);
|
||||
break;
|
||||
case BTRFS_IOC_FORGET_DEV:
|
||||
ret = btrfs_forget_devices(vol->name);
|
||||
if (vol->name[0] != 0) {
|
||||
ret = lookup_bdev(vol->name, &devt);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
ret = btrfs_forget_devices(devt);
|
||||
break;
|
||||
case BTRFS_IOC_DEVICES_READY:
|
||||
mutex_lock(&uuid_mutex);
|
||||
|
@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
|
||||
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
|
||||
/* Remove once support for zoned allocation is feature complete */
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
/* Remove once support for zoned allocation is feature complete */
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
|
||||
/* Remove once support for extent tree v2 is feature complete */
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
|
||||
#endif
|
||||
#ifdef CONFIG_FS_VERITY
|
||||
BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
|
||||
@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
|
||||
BTRFS_FEAT_ATTR_PTR(raid1c34),
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
BTRFS_FEAT_ATTR_PTR(zoned),
|
||||
BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
|
||||
#endif
|
||||
#ifdef CONFIG_FS_VERITY
|
||||
BTRFS_FEAT_ATTR_PTR(verity),
|
||||
@ -1104,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
|
||||
static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
|
||||
static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
|
||||
|
||||
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
|
||||
ARRAY_SIZE(btrfs_feature_attrs));
|
||||
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
|
||||
ARRAY_SIZE(btrfs_feature_attrs[0]));
|
||||
|
||||
static const u64 supported_feature_masks[FEAT_MAX] = {
|
||||
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
|
||||
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
|
||||
@ -1272,11 +1280,6 @@ static void init_feature_attrs(void)
|
||||
struct btrfs_feature_attr *fa;
|
||||
int set, i;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
|
||||
ARRAY_SIZE(btrfs_feature_attrs));
|
||||
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
|
||||
ARRAY_SIZE(btrfs_feature_attrs[0]));
|
||||
|
||||
memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
|
||||
memset(btrfs_unknown_feature_names, 0,
|
||||
sizeof(btrfs_unknown_feature_names));
|
||||
|
@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
|
||||
struct extent_map *em;
|
||||
struct rb_node *node;
|
||||
|
||||
write_lock(&em_tree->lock);
|
||||
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
|
||||
node = rb_first_cached(&em_tree->map);
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
|
||||
#endif
|
||||
free_extent_map(em);
|
||||
}
|
||||
write_unlock(&em_tree->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
|
||||
super->cache_generation = 0;
|
||||
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
|
||||
super->uuid_tree_generation = root_item->generation;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
root_item = &fs_info->block_group_root->root_item;
|
||||
|
||||
super->block_group_root = root_item->bytenr;
|
||||
super->block_group_root_generation = root_item->generation;
|
||||
super->block_group_root_level = root_item->level;
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
|
||||
@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
list_add_tail(&fs_info->chunk_root->dirty_list,
|
||||
&cur_trans->switch_commits);
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_set_root_node(&fs_info->block_group_root->root_item,
|
||||
fs_info->block_group_root->node);
|
||||
list_add_tail(&fs_info->block_group_root->dirty_list,
|
||||
&cur_trans->switch_commits);
|
||||
}
|
||||
|
||||
switch_commit_roots(trans);
|
||||
|
||||
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
||||
@ -2490,10 +2505,10 @@ cleanup_transaction:
|
||||
* because btrfs_commit_super will poke cleaner thread and it will process it a
|
||||
* few seconds later.
|
||||
*/
|
||||
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
|
||||
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
int ret;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (list_empty(&fs_info->dead_roots)) {
|
||||
|
@ -217,7 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
|
||||
void btrfs_add_dead_root(struct btrfs_root *root);
|
||||
int btrfs_defrag_root(struct btrfs_root *root);
|
||||
void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
|
||||
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
|
||||
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
|
||||
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
|
||||
|
@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
|
||||
static int check_block_group_item(struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
struct btrfs_block_group_item bgi;
|
||||
u32 item_size = btrfs_item_size(leaf, slot);
|
||||
u64 chunk_objectid;
|
||||
u64 flags;
|
||||
u64 type;
|
||||
|
||||
@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
|
||||
|
||||
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
|
||||
sizeof(bgi));
|
||||
if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
|
||||
BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
|
||||
chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
/*
|
||||
* We don't init the nr_global_roots until we load the global
|
||||
* roots, so this could be 0 at mount time. If it's 0 we'll
|
||||
* just assume we're fine, and later we'll check against our
|
||||
* actual value.
|
||||
*/
|
||||
if (unlikely(fs_info->nr_global_roots &&
|
||||
chunk_objectid >= fs_info->nr_global_roots)) {
|
||||
block_group_err(leaf, slot,
|
||||
"invalid block group global root id, have %llu, needs to be <= %llu",
|
||||
chunk_objectid,
|
||||
fs_info->nr_global_roots);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
|
||||
block_group_err(leaf, slot,
|
||||
"invalid block group chunk objectid, have %llu expect %llu",
|
||||
btrfs_stack_block_group_chunk_objectid(&bgi),
|
||||
@ -1648,7 +1665,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
|
||||
/* These trees must never be empty */
|
||||
if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
|
||||
owner == BTRFS_CHUNK_TREE_OBJECTID ||
|
||||
owner == BTRFS_EXTENT_TREE_OBJECTID ||
|
||||
owner == BTRFS_DEV_TREE_OBJECTID ||
|
||||
owner == BTRFS_FS_TREE_OBJECTID ||
|
||||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
|
||||
@ -1657,12 +1673,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
|
||||
owner);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Unknown tree */
|
||||
if (unlikely(owner == 0)) {
|
||||
generic_err(leaf, 0,
|
||||
"invalid owner, root 0 is not defined");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* EXTENT_TREE_V2 can have empty extent trees. */
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return 0;
|
||||
|
||||
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
|
||||
generic_err(leaf, 0,
|
||||
"invalid root, root %llu must never be empty",
|
||||
owner);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -17,6 +17,8 @@ struct btrfs_log_ctx {
|
||||
int log_transid;
|
||||
bool log_new_dentries;
|
||||
bool logging_new_name;
|
||||
/* Indicate if the inode being logged was logged before. */
|
||||
bool logged_before;
|
||||
/* Tracks the last logged dir item/index key offset. */
|
||||
u64 last_dir_item_offset;
|
||||
struct inode *inode;
|
||||
@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
|
||||
ctx->log_transid = 0;
|
||||
ctx->log_new_dentries = false;
|
||||
ctx->logging_new_name = false;
|
||||
ctx->logged_before = false;
|
||||
ctx->inode = inode;
|
||||
INIT_LIST_HEAD(&ctx->list);
|
||||
INIT_LIST_HEAD(&ctx->ordered_extents);
|
||||
@ -86,7 +89,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
||||
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir);
|
||||
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
|
||||
struct dentry *parent);
|
||||
struct dentry *old_dentry, struct btrfs_inode *old_dir,
|
||||
u64 old_dir_index, struct dentry *parent);
|
||||
|
||||
#endif
|
||||
|
@ -534,30 +534,20 @@ error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool device_path_matched(const char *path, struct btrfs_device *device)
|
||||
{
|
||||
int found;
|
||||
|
||||
rcu_read_lock();
|
||||
found = strcmp(rcu_str_deref(device->name), path);
|
||||
rcu_read_unlock();
|
||||
|
||||
return found == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search and remove all stale (devices which are not mounted) devices.
|
||||
/**
|
||||
* Search and remove all stale devices (which are not mounted).
|
||||
* When both inputs are NULL, it will search and release all stale devices.
|
||||
* path: Optional. When provided will it release all unmounted devices
|
||||
* matching this path only.
|
||||
* skip_dev: Optional. Will skip this device when searching for the stale
|
||||
*
|
||||
* @devt: Optional. When provided will it release all unmounted devices
|
||||
* matching this devt only.
|
||||
* @skip_device: Optional. Will skip this device when searching for the stale
|
||||
* devices.
|
||||
* Return: 0 for success or if @path is NULL.
|
||||
* -EBUSY if @path is a mounted device.
|
||||
* -ENOENT if @path does not match any device in the list.
|
||||
*
|
||||
* Return: 0 for success or if @devt is 0.
|
||||
* -EBUSY if @devt is a mounted device.
|
||||
* -ENOENT if @devt does not match any device in the list.
|
||||
*/
|
||||
static int btrfs_free_stale_devices(const char *path,
|
||||
struct btrfs_device *skip_device)
|
||||
static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
|
||||
struct btrfs_device *device, *tmp_device;
|
||||
@ -565,7 +555,7 @@ static int btrfs_free_stale_devices(const char *path,
|
||||
|
||||
lockdep_assert_held(&uuid_mutex);
|
||||
|
||||
if (path)
|
||||
if (devt)
|
||||
ret = -ENOENT;
|
||||
|
||||
list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
|
||||
@ -575,13 +565,11 @@ static int btrfs_free_stale_devices(const char *path,
|
||||
&fs_devices->devices, dev_list) {
|
||||
if (skip_device && skip_device == device)
|
||||
continue;
|
||||
if (path && !device->name)
|
||||
continue;
|
||||
if (path && !device_path_matched(path, device))
|
||||
if (devt && devt != device->devt)
|
||||
continue;
|
||||
if (fs_devices->opened) {
|
||||
/* for an already deleted device return 0 */
|
||||
if (path && ret != 0)
|
||||
if (devt && ret != 0)
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
@ -614,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *device, fmode_t flags,
|
||||
void *holder)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct block_device *bdev;
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 devid;
|
||||
@ -656,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
}
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!blk_queue_nonrot(q))
|
||||
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
|
||||
fs_devices->rotating = true;
|
||||
|
||||
device->bdev = bdev;
|
||||
@ -781,11 +767,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
struct rcu_string *name;
|
||||
u64 found_transid = btrfs_super_generation(disk_super);
|
||||
u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
|
||||
dev_t path_devt;
|
||||
int error;
|
||||
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
|
||||
bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
|
||||
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
|
||||
|
||||
error = lookup_bdev(path, &path_devt);
|
||||
if (error)
|
||||
return ERR_PTR(error);
|
||||
|
||||
if (fsid_change_in_progress) {
|
||||
if (!has_metadata_uuid)
|
||||
fs_devices = find_fsid_inprogress(disk_super);
|
||||
@ -868,6 +860,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
rcu_assign_pointer(device->name, name);
|
||||
device->devt = path_devt;
|
||||
|
||||
list_add_rcu(&device->dev_list, &fs_devices->devices);
|
||||
fs_devices->num_devices++;
|
||||
@ -928,25 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
/*
|
||||
* We are going to replace the device path for a given devid,
|
||||
* make sure it's the same device if the device is mounted
|
||||
*
|
||||
* NOTE: the device->fs_info may not be reliable here so pass
|
||||
* in a NULL to message helpers instead. This avoids a possible
|
||||
* use-after-free when the fs_info and fs_info->sb are already
|
||||
* torn down.
|
||||
*/
|
||||
if (device->bdev) {
|
||||
int error;
|
||||
dev_t path_dev;
|
||||
|
||||
error = lookup_bdev(path, &path_dev);
|
||||
if (error) {
|
||||
if (device->devt != path_devt) {
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
if (device->bdev->bd_dev != path_dev) {
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
/*
|
||||
* device->fs_info may not be reliable here, so
|
||||
* pass in a NULL instead. This avoids a
|
||||
* possible use-after-free when the fs_info and
|
||||
* fs_info->sb are already torn down.
|
||||
*/
|
||||
btrfs_warn_in_rcu(NULL,
|
||||
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
|
||||
path, devid, found_transid,
|
||||
@ -954,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
task_pid_nr(current));
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
btrfs_info_in_rcu(device->fs_info,
|
||||
btrfs_info_in_rcu(NULL,
|
||||
"devid %llu device path %s changed to %s scanned by %s (%d)",
|
||||
devid, rcu_str_deref(device->name),
|
||||
path, current->comm,
|
||||
@ -972,6 +955,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
fs_devices->missing_devices--;
|
||||
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||
}
|
||||
device->devt = path_devt;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1331,12 +1315,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
|
||||
return disk_super;
|
||||
}
|
||||
|
||||
int btrfs_forget_devices(const char *path)
|
||||
int btrfs_forget_devices(dev_t devt)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
|
||||
ret = btrfs_free_stale_devices(devt, NULL);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
|
||||
return ret;
|
||||
@ -1385,10 +1369,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
|
||||
}
|
||||
|
||||
device = device_list_add(path, disk_super, &new_device_added);
|
||||
if (!IS_ERR(device)) {
|
||||
if (new_device_added)
|
||||
btrfs_free_stale_devices(path, device);
|
||||
}
|
||||
if (!IS_ERR(device) && new_device_added)
|
||||
btrfs_free_stale_devices(device->devt, device);
|
||||
|
||||
btrfs_release_disk_super(disk_super);
|
||||
|
||||
@ -2102,6 +2084,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
|
||||
u64 num_devices;
|
||||
int ret = 0;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The device list in fs_devices is accessed without locks (neither
|
||||
* uuid_mutex nor device_list_mutex) as it won't change on a mounted
|
||||
@ -2606,7 +2593,6 @@ error:
|
||||
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->dev_root;
|
||||
struct request_queue *q;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_device *device;
|
||||
struct block_device *bdev;
|
||||
@ -2668,6 +2654,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
|
||||
device->fs_info = fs_info;
|
||||
device->bdev = bdev;
|
||||
ret = lookup_bdev(device_path, &device->devt);
|
||||
if (ret)
|
||||
goto error_free_device;
|
||||
|
||||
ret = btrfs_get_dev_zone_info(device, false);
|
||||
if (ret)
|
||||
@ -2679,7 +2668,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
goto error_free_zone;
|
||||
}
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
device->generation = trans->transid;
|
||||
device->io_width = fs_info->sectorsize;
|
||||
@ -2727,7 +2715,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
|
||||
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
|
||||
|
||||
if (!blk_queue_nonrot(q))
|
||||
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
|
||||
fs_devices->rotating = true;
|
||||
|
||||
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
|
||||
@ -2814,7 +2802,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
* We can ignore the return value as it typically returns -EINVAL and
|
||||
* only succeeds if the device was an alien.
|
||||
*/
|
||||
btrfs_forget_devices(device_path);
|
||||
btrfs_forget_devices(device->devt);
|
||||
|
||||
/* Update ctime/mtime for blkid or udev */
|
||||
update_dev_time(device_path);
|
||||
@ -3251,6 +3239,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
u64 length;
|
||||
int ret;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
btrfs_err(fs_info,
|
||||
"relocate: not supported on extent tree v2 yet");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prevent races with automatic removal of unused block groups.
|
||||
* After we relocate and before we remove the chunk with offset
|
||||
@ -7060,6 +7054,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
|
||||
u64 devid, u8 *uuid)
|
||||
{
|
||||
struct btrfs_device *dev;
|
||||
|
||||
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, true);
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
|
||||
if (IS_ERR(dev)) {
|
||||
btrfs_err(fs_info, "failed to init missing device %llu: %ld",
|
||||
devid, PTR_ERR(dev));
|
||||
return dev;
|
||||
}
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, false);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk)
|
||||
{
|
||||
@ -7147,28 +7162,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||
BTRFS_UUID_SIZE);
|
||||
args.uuid = uuid;
|
||||
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
if (!map->stripes[i].dev &&
|
||||
!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
free_extent_map(em);
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, true);
|
||||
return -ENOENT;
|
||||
}
|
||||
if (!map->stripes[i].dev) {
|
||||
map->stripes[i].dev =
|
||||
add_missing_dev(fs_info->fs_devices, devid,
|
||||
uuid);
|
||||
map->stripes[i].dev = handle_missing_device(fs_info,
|
||||
devid, uuid);
|
||||
if (IS_ERR(map->stripes[i].dev)) {
|
||||
free_extent_map(em);
|
||||
btrfs_err(fs_info,
|
||||
"failed to init missing dev %llu: %ld",
|
||||
devid, PTR_ERR(map->stripes[i].dev));
|
||||
return PTR_ERR(map->stripes[i].dev);
|
||||
}
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, false);
|
||||
}
|
||||
|
||||
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
|
||||
&(map->stripes[i].dev->dev_state));
|
||||
|
||||
}
|
||||
|
||||
write_lock(&map_tree->lock);
|
||||
@ -8299,10 +8303,12 @@ static int relocating_repair_kthread(void *data)
|
||||
target = cache->start;
|
||||
btrfs_put_block_group(cache);
|
||||
|
||||
sb_start_write(fs_info->sb);
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
|
||||
btrfs_info(fs_info,
|
||||
"zoned: skip relocating block group %llu to repair: EBUSY",
|
||||
target);
|
||||
sb_end_write(fs_info->sb);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@ -8330,6 +8336,7 @@ out:
|
||||
btrfs_put_block_group(cache);
|
||||
mutex_unlock(&fs_info->reclaim_bgs_lock);
|
||||
btrfs_exclop_finish(fs_info);
|
||||
sb_end_write(fs_info->sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -72,6 +72,11 @@ struct btrfs_device {
|
||||
/* the mode sent to blkdev_get */
|
||||
fmode_t mode;
|
||||
|
||||
/*
|
||||
* Device's major-minor number. Must be set even if the device is not
|
||||
* opened (bdev == NULL), unless the device is missing.
|
||||
*/
|
||||
dev_t devt;
|
||||
unsigned long dev_state;
|
||||
blk_status_t last_flush_error;
|
||||
|
||||
@ -505,7 +510,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
fmode_t flags, void *holder);
|
||||
struct btrfs_device *btrfs_scan_one_device(const char *path,
|
||||
fmode_t flags, void *holder);
|
||||
int btrfs_forget_devices(const char *path);
|
||||
int btrfs_forget_devices(dev_t devt);
|
||||
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
||||
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
|
||||
void btrfs_assign_next_active_device(struct btrfs_device *device,
|
||||
|
173
fs/btrfs/zoned.c
173
fs/btrfs/zoned.c
@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
if (model == BLK_ZONED_HM ||
|
||||
(model == BLK_ZONED_HA && incompat_zoned) ||
|
||||
(model == BLK_ZONED_NONE && incompat_zoned)) {
|
||||
struct btrfs_zoned_device_info *zone_info =
|
||||
device->zone_info;
|
||||
struct btrfs_zoned_device_info *zone_info;
|
||||
|
||||
zone_info = device->zone_info;
|
||||
zoned_devices++;
|
||||
@ -1215,12 +1214,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
struct btrfs_device *device;
|
||||
u64 logical = cache->start;
|
||||
u64 length = cache->length;
|
||||
u64 physical = 0;
|
||||
int ret;
|
||||
int i;
|
||||
unsigned int nofs_flag;
|
||||
u64 *alloc_offsets = NULL;
|
||||
u64 *caps = NULL;
|
||||
u64 *physical = NULL;
|
||||
unsigned long *active = NULL;
|
||||
u64 last_alloc = 0;
|
||||
u32 num_sequential = 0, num_conventional = 0;
|
||||
@ -1264,6 +1263,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
|
||||
physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
|
||||
if (!physical) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
|
||||
if (!active) {
|
||||
ret = -ENOMEM;
|
||||
@ -1277,14 +1282,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
int dev_replace_is_ongoing = 0;
|
||||
|
||||
device = map->stripes[i].dev;
|
||||
physical = map->stripes[i].physical;
|
||||
physical[i] = map->stripes[i].physical;
|
||||
|
||||
if (device->bdev == NULL) {
|
||||
alloc_offsets[i] = WP_MISSING_DEV;
|
||||
continue;
|
||||
}
|
||||
|
||||
is_sequential = btrfs_dev_is_sequential(device, physical);
|
||||
is_sequential = btrfs_dev_is_sequential(device, physical[i]);
|
||||
if (is_sequential)
|
||||
num_sequential++;
|
||||
else
|
||||
@ -1299,21 +1304,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
* This zone will be used for allocation, so mark this zone
|
||||
* non-empty.
|
||||
*/
|
||||
btrfs_dev_clear_zone_empty(device, physical);
|
||||
btrfs_dev_clear_zone_empty(device, physical[i]);
|
||||
|
||||
down_read(&dev_replace->rwsem);
|
||||
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
|
||||
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
|
||||
btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
|
||||
btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
|
||||
up_read(&dev_replace->rwsem);
|
||||
|
||||
/*
|
||||
* The group is mapped to a sequential zone. Get the zone write
|
||||
* pointer to determine the allocation offset within the zone.
|
||||
*/
|
||||
WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
|
||||
WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ret = btrfs_get_dev_zone(device, physical, &zone);
|
||||
ret = btrfs_get_dev_zone(device, physical[i], &zone);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (ret == -EIO || ret == -EOPNOTSUPP) {
|
||||
ret = 0;
|
||||
@ -1339,7 +1344,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
btrfs_err(fs_info,
|
||||
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
|
||||
physical >> device->zone_info->zone_size_shift,
|
||||
physical[i] >> device->zone_info->zone_size_shift,
|
||||
rcu_str_deref(device->name), device->devid);
|
||||
alloc_offsets[i] = WP_MISSING_DEV;
|
||||
break;
|
||||
@ -1404,7 +1409,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
if (alloc_offsets[0] == WP_MISSING_DEV) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: cannot recover write pointer for zone %llu",
|
||||
physical);
|
||||
physical[0]);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -1413,6 +1418,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
cache->zone_is_active = test_bit(0, active);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_DUP:
|
||||
if (map->type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (alloc_offsets[0] == WP_MISSING_DEV) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: cannot recover write pointer for zone %llu",
|
||||
physical[0]);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (alloc_offsets[1] == WP_MISSING_DEV) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: cannot recover write pointer for zone %llu",
|
||||
physical[1]);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (alloc_offsets[0] != alloc_offsets[1]) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: write pointer offset mismatch of zones in DUP profile");
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
if (test_bit(0, active) != test_bit(1, active)) {
|
||||
if (!btrfs_zone_activate(cache)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
cache->zone_is_active = test_bit(0, active);
|
||||
}
|
||||
cache->alloc_offset = alloc_offsets[0];
|
||||
cache->zone_capacity = min(caps[0], caps[1]);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_RAID1:
|
||||
case BTRFS_BLOCK_GROUP_RAID0:
|
||||
case BTRFS_BLOCK_GROUP_RAID10:
|
||||
@ -1465,6 +1506,7 @@ out:
|
||||
cache->physical_map = NULL;
|
||||
}
|
||||
bitmap_free(active);
|
||||
kfree(physical);
|
||||
kfree(caps);
|
||||
kfree(alloc_offsets);
|
||||
free_extent_map(em);
|
||||
@ -1781,50 +1823,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
bool ret;
|
||||
int i;
|
||||
|
||||
if (!btrfs_is_zoned(block_group->fs_info))
|
||||
return true;
|
||||
|
||||
map = block_group->physical_map;
|
||||
/* Currently support SINGLE profile only */
|
||||
ASSERT(map->num_stripes == 1);
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
return true;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (block_group->zone_is_active) {
|
||||
ret = true;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* No space left */
|
||||
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
device = map->stripes[i].dev;
|
||||
physical = map->stripes[i].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
continue;
|
||||
|
||||
/* No space left */
|
||||
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||
/* Cannot activate the zone */
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Successfully activated all the zones */
|
||||
if (i == map->num_stripes - 1)
|
||||
block_group->zone_is_active = 1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||
/* Cannot activate the zone */
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Successfully activated all the zones */
|
||||
block_group->zone_is_active = 1;
|
||||
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
/* For the active block group list */
|
||||
btrfs_get_block_group(block_group);
|
||||
if (block_group->zone_is_active) {
|
||||
/* For the active block group list */
|
||||
btrfs_get_block_group(block_group);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(list_empty(&block_group->active_bg_list));
|
||||
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_add_tail(&block_group->active_bg_list,
|
||||
&fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@ -1840,19 +1887,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
map = block_group->physical_map;
|
||||
/* Currently support SINGLE profile only */
|
||||
ASSERT(map->num_stripes == 1);
|
||||
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
return 0;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zone_is_active) {
|
||||
@ -1904,25 +1944,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
btrfs_clear_data_reloc_bg(block_group);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
device = map->stripes[i].dev;
|
||||
physical = map->stripes[i].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
continue;
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
}
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
|
||||
if (!ret) {
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
/* For active_bg_list */
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
/* For active_bg_list */
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
|
||||
|
@ -157,11 +157,6 @@ extern char *simple_dname(struct dentry *, char *, int);
|
||||
extern void dput_to_list(struct dentry *, struct list_head *);
|
||||
extern void shrink_dentry_list(struct list_head *);
|
||||
|
||||
/*
|
||||
* read_write.c
|
||||
*/
|
||||
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
|
||||
|
||||
/*
|
||||
* pipe.c
|
||||
*/
|
||||
|
@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
|
||||
|
||||
if (!src_file.file)
|
||||
return -EBADF;
|
||||
ret = -EXDEV;
|
||||
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
|
||||
goto fdput;
|
||||
cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
|
||||
olen, 0);
|
||||
if (cloned < 0)
|
||||
@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
|
||||
ret = -EINVAL;
|
||||
else
|
||||
ret = 0;
|
||||
fdput:
|
||||
fdput(src_file);
|
||||
return ret;
|
||||
}
|
||||
|
@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
|
||||
return security_file_permission(file,
|
||||
read_write == READ ? MAY_READ : MAY_WRITE);
|
||||
}
|
||||
EXPORT_SYMBOL(rw_verify_area);
|
||||
|
||||
static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
|
||||
{
|
||||
@ -1617,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Performs necessary checks before doing a write
|
||||
*
|
||||
* Can adjust writing position or amount of bytes to write.
|
||||
* Returns appropriate error code that caller should return or
|
||||
* zero in case that write should be allowed.
|
||||
*/
|
||||
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
/* Like generic_write_checks(), but takes size of write instead of iter. */
|
||||
int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
loff_t count;
|
||||
int ret;
|
||||
|
||||
if (IS_SWAPFILE(inode))
|
||||
return -ETXTBSY;
|
||||
|
||||
if (!iov_iter_count(from))
|
||||
if (!*count)
|
||||
return 0;
|
||||
|
||||
/* FIXME: this is for backwards compatibility with 2.4 */
|
||||
@ -1644,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
|
||||
return -EINVAL;
|
||||
|
||||
count = iov_iter_count(from);
|
||||
ret = generic_write_check_limits(file, iocb->ki_pos, &count);
|
||||
return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
|
||||
}
|
||||
EXPORT_SYMBOL(generic_write_checks_count);
|
||||
|
||||
/*
|
||||
* Performs necessary checks before doing a write
|
||||
*
|
||||
* Can adjust writing position or amount of bytes to write.
|
||||
* Returns appropriate error code that caller should return or
|
||||
* zero in case that write should be allowed.
|
||||
*/
|
||||
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
loff_t count = iov_iter_count(from);
|
||||
int ret;
|
||||
|
||||
ret = generic_write_checks_count(iocb, &count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
|
||||
|
||||
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
|
||||
|
||||
/*
|
||||
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
|
||||
* the same mount. Practically, they only need to be on the same file
|
||||
* system.
|
||||
*/
|
||||
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
|
||||
return -EXDEV;
|
||||
|
||||
@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EXDEV;
|
||||
if (src_file->f_path.mnt != dst_file->f_path.mnt)
|
||||
if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EISDIR;
|
||||
|
@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int);
|
||||
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
|
||||
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
|
||||
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
|
||||
int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
|
||||
extern int generic_write_check_limits(struct file *file, loff_t pos,
|
||||
loff_t *count);
|
||||
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
|
||||
@ -3173,6 +3174,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
|
||||
int whence, loff_t size);
|
||||
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
|
||||
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
|
||||
int rw_verify_area(int, struct file *, const loff_t *, size_t);
|
||||
extern int generic_file_open(struct inode * inode, struct file * filp);
|
||||
extern int nonseekable_open(struct inode * inode, struct file * filp);
|
||||
extern int stream_open(struct inode * inode, struct file * filp);
|
||||
|
@ -53,6 +53,7 @@ struct btrfs_space_info;
|
||||
{ BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \
|
||||
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \
|
||||
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \
|
||||
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
|
||||
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
|
||||
|
||||
#define show_root_type(obj) \
|
||||
|
@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
|
||||
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
|
||||
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
|
||||
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
|
||||
#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
|
||||
|
||||
struct btrfs_ioctl_feature_flags {
|
||||
__u64 compat_flags;
|
||||
@ -868,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
|
||||
__u8 align[7];
|
||||
};
|
||||
|
||||
/*
|
||||
* Data and metadata for an encoded read or write.
|
||||
*
|
||||
* Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
|
||||
* compression). This can be used to read the compressed contents of a file or
|
||||
* write pre-compressed data directly to a file.
|
||||
*
|
||||
* BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
|
||||
* preadv/pwritev with additional metadata about how the data is encoded and the
|
||||
* size of the unencoded data.
|
||||
*
|
||||
* BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
|
||||
* the metadata fields, and returns the size of the encoded data. It reads one
|
||||
* extent per call. It can also read data which is not encoded.
|
||||
*
|
||||
* BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
|
||||
* from the iovecs, and returns the size of the encoded data. Note that the
|
||||
* encoded data is not validated when it is written; if it is not valid (e.g.,
|
||||
* it cannot be decompressed), then a subsequent read may return an error.
|
||||
*
|
||||
* Since the filesystem page cache contains decoded data, encoded I/O bypasses
|
||||
* the page cache. Encoded I/O requires CAP_SYS_ADMIN.
|
||||
*/
|
||||
struct btrfs_ioctl_encoded_io_args {
|
||||
/* Input parameters for both reads and writes. */
|
||||
|
||||
/*
|
||||
* iovecs containing encoded data.
|
||||
*
|
||||
* For reads, if the size of the encoded data is larger than the sum of
|
||||
* iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
|
||||
* ENOBUFS.
|
||||
*
|
||||
* For writes, the size of the encoded data is the sum of iov[n].iov_len
|
||||
* for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
|
||||
* increase in the future). This must also be less than or equal to
|
||||
* unencoded_len.
|
||||
*/
|
||||
const struct iovec __user *iov;
|
||||
/* Number of iovecs. */
|
||||
unsigned long iovcnt;
|
||||
/*
|
||||
* Offset in file.
|
||||
*
|
||||
* For writes, must be aligned to the sector size of the filesystem.
|
||||
*/
|
||||
__s64 offset;
|
||||
/* Currently must be zero. */
|
||||
__u64 flags;
|
||||
|
||||
/*
|
||||
* For reads, the following members are output parameters that will
|
||||
* contain the returned metadata for the encoded data.
|
||||
* For writes, the following members must be set to the metadata for the
|
||||
* encoded data.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Length of the data in the file.
|
||||
*
|
||||
* Must be less than or equal to unencoded_len - unencoded_offset. For
|
||||
* writes, must be aligned to the sector size of the filesystem unless
|
||||
* the data ends at or beyond the current end of the file.
|
||||
*/
|
||||
__u64 len;
|
||||
/*
|
||||
* Length of the unencoded (i.e., decrypted and decompressed) data.
|
||||
*
|
||||
* For writes, must be no more than 128 KiB (this limit may increase in
|
||||
* the future). If the unencoded data is actually longer than
|
||||
* unencoded_len, then it is truncated; if it is shorter, then it is
|
||||
* extended with zeroes.
|
||||
*/
|
||||
__u64 unencoded_len;
|
||||
/*
|
||||
* Offset from the first byte of the unencoded data to the first byte of
|
||||
* logical data in the file.
|
||||
*
|
||||
* Must be less than unencoded_len.
|
||||
*/
|
||||
__u64 unencoded_offset;
|
||||
/*
|
||||
* BTRFS_ENCODED_IO_COMPRESSION_* type.
|
||||
*
|
||||
* For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
|
||||
*/
|
||||
__u32 compression;
|
||||
/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
|
||||
__u32 encryption;
|
||||
/*
|
||||
* Reserved for future expansion.
|
||||
*
|
||||
* For reads, always returned as zero. Users should check for non-zero
|
||||
* bytes. If there are any, then the kernel has a newer version of this
|
||||
* structure with additional information that the user definition is
|
||||
* missing.
|
||||
*
|
||||
* For writes, must be zeroed.
|
||||
*/
|
||||
__u8 reserved[64];
|
||||
};
|
||||
|
||||
/* Data is not compressed. */
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
|
||||
/* Data is compressed as a single zlib stream. */
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
|
||||
/*
|
||||
* Data is compressed as a single zstd frame with the windowLog compression
|
||||
* parameter set to no more than 17.
|
||||
*/
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
|
||||
/*
|
||||
* Data is compressed sector by sector (using the sector size indicated by the
|
||||
* name of the constant) with LZO1X and wrapped in the format documented in
|
||||
* fs/btrfs/lzo.c. For writes, the compression sector size must match the
|
||||
* filesystem sector size.
|
||||
*/
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
|
||||
#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
|
||||
|
||||
/* Data is not encrypted. */
|
||||
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
|
||||
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
|
||||
|
||||
/* Error codes as returned by the kernel */
|
||||
enum btrfs_err_code {
|
||||
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
|
||||
@ -996,5 +1125,9 @@ enum btrfs_err_code {
|
||||
struct btrfs_ioctl_ino_lookup_user_args)
|
||||
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
|
||||
struct btrfs_ioctl_vol_args_v2)
|
||||
#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args)
|
||||
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args)
|
||||
|
||||
#endif /* _UAPI_LINUX_BTRFS_H */
|
||||
|
@ -53,6 +53,9 @@
|
||||
/* tracks free space in block groups. */
|
||||
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
|
||||
|
||||
/* Holds the block group items for extent tree v2. */
|
||||
#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
|
||||
|
||||
/* device stats in the device tree */
|
||||
#define BTRFS_DEV_STATS_OBJECTID 0ULL
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user