for-5.18-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmI44SgACgkQxWXV+ddt
 WDtzyg//YgMKr05jRsU3I/pIQ9znuKZmmllThwF63ZRG4PvKz2QfzvKdrMuzNjru
 5kHbG59iJqtLmU/aVsdp8mL6mmg5U3Ym2bIRsrW5m4HTtTowKdirvL/lQ3/tWm8j
 CSDJhUdCL2SwFjpru+4cxOeHLXNSfsk4BoCu8nsLitL+oXv/EPo/dkmu6nPjiMY3
 RjsIDBeDEf7J20KOuP/qJuN2YOAT7TeISPD3Ow4aDsmndWQ8n6KehEmAZb7QuqZQ
 SYubZ2wTb9HuPH/qpiTIA7innBIr+JkYtUYlz2xxixM2BUWNfqD6oKHw9RgOY5Sg
 CULFssw0i7cgGKsvuPJw1zdM002uG4wwXKigGiyljTVWvxneyr4mNDWiGad+LyFJ
 XWhnABPidkLs/1zbUkJ23DVub5VlfZsypkFDJAUXI0nGu3VrhjDfTYMa8eCe2L/F
 YuGG6CrAC+5K/arKAWTVj7hOb+52UzBTEBJz60LJJ6dS9eQoBy857V6pfo7w7ukZ
 t/tqA6q75O4tk/G3Ix3V1CjuAH3kJE6qXrvBxhpu8aZNjofopneLyGqS5oahpcE8
 8edtT+ZZhNuU9sLSEJCJATVxXRDdNzpQ8CHgOR5HOUbmM/vwKNzHPfRQzDnImznw
 UaUlFaaHwK17M6Y/6CnMecz26U2nVSJ7pyh39mb784XYe2a1efE=
 =YARd
 -----END PGP SIGNATURE-----

Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This contains feature updates, performance improvements, preparatory
  and core work and some related VFS updates:

  Features:

   - encoded read/write ioctls, allows user space to read or write raw
     data directly to extents (now compressed, encrypted in the future),
     will be used by send/receive v2 where it saves processing time

   - zoned mode now works with metadata DUP (the mkfs.btrfs default)

   - error message header updates:
      - print error state: transaction abort, other error, log tree
        errors
      - print transient filesystem state: remount, device replace,
        ignored checksum verifications

   - tree-checker: verify the transaction id of the to-be-written dirty
     extent buffer

  Performance improvements for fsync:

   - directory logging speedups (up to -90% run time)

   - avoid logging all directory changes during renames (up to -60% run
     time)

   - avoid inode logging during rename and link when possible (up to
     -60% run time)

   - prepare extents to be logged before locking a log tree path
     (throughput +7%)

   - stop copying old file extents when doing a full fsync()

   - improved logging of old extents after truncate

  Core, fixes:

   - improved stale device identification by dev_t and not just path
     (for devices that are behind other layers like device mapper)

   - continued extent tree v2 preparatory work
      - disable features that won't work yet
      - add wrappers and abstractions for new tree roots

   - improved error handling

   - add super block write annotations around background block group
     reclaim

   - fix device scanning messages potentially accessing stale pointer

   - cleanups and refactoring

  VFS:

   - allow reflinks/deduplication from two different mounts of the same
     filesystem

   - export and add helpers for read/write range verification, for the
     encoded ioctls"

* tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits)
  btrfs: zoned: put block group after final usage
  btrfs: don't access possibly stale fs_info data in device_list_add
  btrfs: add lockdep_assert_held to need_preemptive_reclaim
  btrfs: verify the tranisd of the to-be-written dirty extent buffer
  btrfs: unify the error handling of btrfs_read_buffer()
  btrfs: unify the error handling pattern for read_tree_block()
  btrfs: factor out do_free_extent_accounting helper
  btrfs: remove last_ref from the extent freeing code
  btrfs: add a alloc_reserved_extent helper
  btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block
  btrfs: add and use helper for unlinking inode during log replay
  btrfs: extend locking to all space_info members accesses
  btrfs: zoned: mark relocation as writing
  fs: allow cross-vfsmount reflink/dedupe
  btrfs: remove the cross file system checks from remap
  btrfs: pass btrfs_fs_info to btrfs_recover_relocation
  btrfs: pass btrfs_fs_info for deleting snapshots and cleaner
  btrfs: add filesystems state details to error messages
  btrfs: deal with unexpected extent type during reflinking
  btrfs: fix unexpected error path when reflinking an inline extent
  ...
This commit is contained in:
Linus Torvalds 2022-03-22 10:51:40 -07:00
commit 5191290407
50 changed files with 3113 additions and 1335 deletions

View File

@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
}
if (!extent_buffer_uptodate(eb)) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
}
if (lock)
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
@ -1335,7 +1337,8 @@ again:
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
} else if (!extent_buffer_uptodate(eb)) {
}
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;

View File

@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
sb_end_write(fs_info->sb);
return;
}
/*
* Long running balances can keep us blocked here for eternity, so
@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
return;
}
@ -1605,6 +1610,7 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@ -2006,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->length = key->offset;
cache->used = btrfs_stack_block_group_used(bgi);
cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
set_free_space_tree_thresholds(cache);
@ -2288,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
block_group->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@ -2444,6 +2451,27 @@ next:
btrfs_trans_release_chunk_metadata(trans);
}
/*
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
*/
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
{
u64 div = SZ_1G;
u64 index;
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
/* If we have a smaller fs index based on 128MiB. */
if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
div = SZ_128M;
offset = div64_u64(offset, div);
div64_u64_rem(offset, fs_info->nr_global_roots, &index);
return index;
}
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 bytes_used, u64 type,
u64 chunk_offset, u64 size)
@ -2464,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1;
@ -2693,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
btrfs_set_stack_block_group_used(&bgi, cache->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(leaf);

View File

@ -68,6 +68,7 @@ struct btrfs_block_group {
u64 bytes_super;
u64 flags;
u64 cache_generation;
u64 global_root_id;
/*
* If the free space extent count exceeds this number, convert the block

View File

@ -13,6 +13,13 @@
#include "ordered-data.h"
#include "delayed-inode.h"
/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
* everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
*/
#define BTRFS_DIR_START_INDEX 2
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
@ -173,8 +180,9 @@ struct btrfs_inode {
u64 disk_i_size;
/*
* if this is a directory then index_cnt is the counter for the index
* number for new files that are created
* If this is a directory then index_cnt is the counter for the index
* number for new files that are created. For an empty directory, this
* must be initialized to BTRFS_DIR_START_INDEX.
*/
u64 index_cnt;
@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
spin_unlock(&inode->lock);
}
/*
* Should be called while holding the inode's VFS lock in exclusive mode or in a
* context where no one else can access the inode concurrently (during inode
* creation or when loading an inode from disk).
*/
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
/*
* The inode may have been part of a reflink operation in the last
* transaction that modified it, and then a fsync has reset the
* last_reflink_trans to avoid subsequent fsyncs in the same
* transaction to do unnecessary work. So update last_reflink_trans
* to the last_trans value (we have to be pessimistic and assume a
* reflink happened).
*
* The ->last_trans is protected by the inode's spinlock and we can
* have a concurrent ordered extent completion update it. Also set
* last_reflink_trans to ->last_trans only if the former is less than
* the later, because we can be called in a context where
* last_reflink_trans was set to the current transaction generation
* while ->last_trans was not yet updated in the current transaction,
* and therefore has a lower value.
*/
spin_lock(&inode->lock);
if (inode->last_reflink_trans < inode->last_trans)
inode->last_reflink_trans = inode->last_trans;
spin_unlock(&inode->lock);
}
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
bool ret = false;

View File

@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
bi_size += bvec->bv_len;
if (bio->bi_status)
cb->errors = 1;
cb->status = bio->bi_status;
ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
return last_io;
}
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
static void finish_compressed_bio_read(struct compressed_bio *cb)
{
unsigned int index;
struct page *page;
@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
}
/* Do io completion on the original bio */
if (cb->errors) {
bio_io_error(cb->orig_bio);
if (cb->status != BLK_STS_OK) {
cb->orig_bio->bi_status = cb->status;
bio_endio(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
ASSERT(bio);
ASSERT(!bio->bi_status);
/*
* We have verified the checksum already, set page checked so
* the end_io handlers know about it
*/
ASSERT(!bio_flagged(bio, BIO_CLONED));
ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset;
@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
* Some IO in this cb have failed, just skip checksum as there
* is no way it could be correct.
*/
if (cb->errors == 1)
if (cb->status != BLK_STS_OK)
goto csum_failed;
inode = cb->inode;
@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)
csum_failed:
if (ret)
cb->errors = 1;
finish_compressed_bio_read(cb, bio);
cb->status = errno_to_blk_status(ret);
finish_compressed_bio_read(cb);
out:
bio_put(bio);
}
@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
const int errno = blk_status_to_errno(cb->status);
int i;
int ret;
if (cb->errors)
mapping_set_error(inode->i_mapping, -EIO);
if (errno)
mapping_set_error(inode->i_mapping, errno);
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
continue;
}
for (i = 0; i < ret; i++) {
if (cb->errors)
if (errno)
SetPageError(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
cb->start, cb->len);
@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
*/
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
!cb->errors);
cb->status == BLK_STS_OK);
end_compressed_writeback(inode, cb);
if (cb->writeback)
end_compressed_writeback(inode, cb);
/* Note, our inode could be gone now */
/*
@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css)
struct cgroup_subsys_state *blkcg_css,
bool writeback)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (!cb)
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->writeback = writeback;
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1);
ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret)
goto finish_cb;
}
@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
blk_status_t ret = BLK_STS_RESOURCE;
blk_status_t ret;
int faili = 0;
u8 *sums;
@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
if (!em) {
ret = BLK_STS_IOERR;
goto out;
}
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
if (!cb) {
ret = BLK_STS_RESOURCE;
goto out;
}
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->status = BLK_STS_OK;
cb->inode = inode;
cb->mirror_num = mirror_num;
sums = cb->sums;
@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS);
if (!cb->compressed_pages)
if (!cb->compressed_pages) {
ret = BLK_STS_RESOURCE;
goto fail1;
}
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = NULL;
}
}
return 0;
return BLK_STS_OK;
fail2:
while (faili >= 0) {
@ -951,6 +960,8 @@ fail1:
kfree(cb);
out:
free_extent_map(em);
bio->bi_status = ret;
bio_endio(bio);
return ret;
finish_cb:
if (comp_bio) {
@ -970,7 +981,7 @@ finish_cb:
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_read(cb, NULL);
finish_compressed_bio_read(cb);
return ret;
}

View File

@ -22,6 +22,8 @@ struct btrfs_inode;
/* Maximum length of compressed data stored on disk */
#define BTRFS_MAX_COMPRESSED (SZ_128K)
static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
/* Maximum size of data before compression */
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
@ -52,8 +54,11 @@ struct compressed_bio {
/* The compression algorithm for this bio */
u8 compress_type;
/* Whether this is a write for writeback. */
bool writeback;
/* IO errors */
u8 errors;
blk_status_t status;
int mirror_num;
/* for reads, this is the bio we are copying the data into */
@ -95,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css);
struct cgroup_subsys_state *blkcg_css,
bool writeback);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);

View File

@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
btrfs_header_owner(parent),
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
if (IS_ERR(eb))
return eb;
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
eb = ERR_PTR(-EIO);
return ERR_PTR(-EIO);
}
return eb;
@ -1436,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
if (!ret) {
*eb_ret = tmp;
return 0;
if (ret) {
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EIO;
}
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EIO;
*eb_ret = tmp;
return 0;
}
/*
@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
gen, parent_level - 1, &first_key);
if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
} else {
ret = PTR_ERR(tmp);
if (IS_ERR(tmp)) {
btrfs_release_path(p);
return PTR_ERR(tmp);
}
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
btrfs_release_path(p);
return ret;
@ -2990,16 +2992,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (free_space < data_size)
goto out_unlock;
/* cow and double check */
ret = btrfs_cow_block(trans, root, right, upper,
slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
if (ret)
goto out_unlock;
free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
goto out_unlock;
left_nritems = btrfs_header_nritems(left);
if (left_nritems == 0)
goto out_unlock;
@ -3224,7 +3221,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
/* cow and double check */
ret = btrfs_cow_block(trans, root, left,
path->nodes[1], slot - 1, &left,
BTRFS_NESTING_LEFT_COW);
@ -3235,12 +3231,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
ret = 1;
goto out;
}
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
goto out;
@ -4170,24 +4160,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
u32 last_off;
u32 dsize = 0;
int ret = 0;
int wret;
int i;
u32 nritems;
leaf = path->nodes[0];
last_off = btrfs_item_offset(leaf, slot + nr - 1);
for (i = 0; i < nr; i++)
dsize += btrfs_item_size(leaf, slot + i);
nritems = btrfs_header_nritems(leaf);
if (slot + nr != nritems) {
int data_end = leaf_data_end(leaf);
const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
const int data_end = leaf_data_end(leaf);
struct btrfs_map_token token;
u32 dsize = 0;
int i;
for (i = 0; i < nr; i++)
dsize += btrfs_item_size(leaf, slot + i);
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize,
@ -4227,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
fixup_low_keys(path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
/*
* Try to delete the leaf if it is mostly empty. We do this by
* trying to move all its items into its left and right neighbours.
* If we can't move all the items, then we don't delete it - it's
* not ideal, but future insertions might fill the leaf with more
* items, or items from other leaves might be moved later into our
* leaf due to deletions on those leaves.
*/
if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
u32 min_push_space;
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
* for possible call to del_ptr below
*/
slot = path->slots[1];
atomic_inc(&leaf->refs);
wret = push_leaf_left(trans, root, path, 1, 1,
1, (u32)-1);
/*
* We want to be able to at least push one item to the
* left neighbour leaf, and that's the first item.
*/
min_push_space = sizeof(struct btrfs_item) +
btrfs_item_size(leaf, 0);
wret = push_leaf_left(trans, root, path, 0,
min_push_space, 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
wret = push_leaf_right(trans, root, path, 1,
1, 1, 0);
/*
* If we were not able to push all items from our
* leaf to its left neighbour, then attempt to
* either push all the remaining items to the
* right neighbour or none. There's no advantage
* in pushing only some items, instead of all, as
* it's pointless to end up with a leaf having
* too few items while the neighbours can be full
* or nearly full.
*/
nritems = btrfs_header_nritems(leaf);
min_push_space = leaf_space_used(leaf, 0, nritems);
wret = push_leaf_right(trans, root, path, 0,
min_push_space, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}

View File

@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
struct btrfs_ordered_sum;
struct btrfs_ref;
struct btrfs_bio;
struct btrfs_ioctl_encoded_io_args;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@ -148,6 +149,8 @@ enum {
/* Indicates there was an error cleaning up a log tree. */
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
BTRFS_FS_STATE_COUNT
};
#define BTRFS_BACKREF_REV_MAX 256
@ -274,8 +277,14 @@ struct btrfs_super_block {
/* the UUID written into btree blocks */
u8 metadata_uuid[BTRFS_FSID_SIZE];
/* Extent tree v2 */
__le64 block_group_root;
__le64 block_group_root_generation;
u8 block_group_root_level;
/* future expansion */
__le64 reserved[28];
u8 reserved8[7];
__le64 reserved[25];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
@ -300,6 +309,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
#ifdef CONFIG_BTRFS_DEBUG
/*
* Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
*/
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_ZONED | \
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
#else
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@ -314,6 +343,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_ZONED)
#endif
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@ -636,6 +666,7 @@ struct btrfs_fs_info {
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
struct btrfs_root *data_reloc_root;
struct btrfs_root *block_group_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@ -1030,6 +1061,8 @@ struct btrfs_fs_info {
spinlock_t relocation_bg_lock;
u64 data_reloc_bg;
u64 nr_global_roots;
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
@ -1609,25 +1642,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
u##bits val) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
} \
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
type *s, u##bits val) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
}
@ -1658,8 +1691,8 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s)
{
BUILD_BUG_ON(sizeof(u64) !=
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
static_assert(sizeof(u64) ==
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
total_bytes));
}
@ -1667,8 +1700,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s,
u64 val)
{
BUILD_BUG_ON(sizeof(u64) !=
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
static_assert(sizeof(u64) ==
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
}
@ -2328,6 +2361,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
/*
* For extent tree v2 we overload the extent root with the block group root, as
* we will have multiple extent roots.
*/
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
extent_root, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
extent_root_gen, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
struct btrfs_root_backup, extent_root_level, 8);
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
@ -2462,6 +2506,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
block_group_root, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
struct btrfs_super_block,
block_group_root_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
block_group_root_level, 8);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
@ -2839,7 +2890,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
u64 disk_num_bytes);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
@ -3155,7 +3207,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig);
u64 offset, bool one_ordered);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@ -3256,6 +3308,11 @@ int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate);
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
@ -3318,6 +3375,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
struct btrfs_trans_handle **trans_out);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end);
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
@ -3774,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,

View File

@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
}
static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
u64 num_bytes, u64 *meta_reserve,
u64 *qgroup_reserve)
u64 num_bytes, u64 disk_num_bytes,
u64 *meta_reserve, u64 *qgroup_reserve)
{
u64 nr_extents = count_max_extents(num_bytes);
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
*qgroup_reserve = nr_extents * fs_info->nodesize;
}
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
u64 disk_num_bytes)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
}
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
/*
* We always want to do it this way, every other way is wrong and ends
@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
* everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done.
*/
calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
&qgroup_reserve);
calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
&meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret)
return ret;
@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents);
inode->csum_bytes += num_bytes;
inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(inode, len);
ret = btrfs_delalloc_reserve_metadata(inode, len, len);
if (ret < 0) {
btrfs_free_reserved_data_space(inode, *reserved, start, len);
extent_changeset_free(*reserved);

View File

@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev,
struct btrfs_device **device_out)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
struct block_device *bdev;
struct rcu_string *name;
@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
sync_blockdev(bdev);
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev == bdev) {
btrfs_err(fs_info,
"target device is in the filesystem!");
@ -302,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
goto error;
}
rcu_assign_pointer(device->name, name);
ret = lookup_bdev(device_path, &device->devt);
if (ret)
goto error;
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = 0;
@ -320,17 +324,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->mode = FMODE_EXCL;
device->dev_stats_valid = 1;
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
device->fs_devices = fs_info->fs_devices;
device->fs_devices = fs_devices;
ret = btrfs_get_dev_zone_info(device, false);
if (ret)
goto error;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_info->fs_devices->devices);
fs_info->fs_devices->num_devices++;
fs_info->fs_devices->open_devices++;
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
mutex_lock(&fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++;
fs_devices->open_devices++;
mutex_unlock(&fs_devices->device_list_mutex);
*device_out = device;
return 0;

View File

@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
else
ret = btrfs_check_leaf_full(eb);
if (ret < 0) {
btrfs_print_tree(eb, 0);
if (ret < 0)
goto error;
/*
* Also check the generation, the eb reached here must be newer than
* last committed. Or something seriously wrong happened.
*/
if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
ret = -EUCLEAN;
btrfs_err(fs_info,
"block=%llu write time tree block corruption detected",
eb->start);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return ret;
"block=%llu bad generation, have %llu expect > %llu",
eb->start, btrfs_header_generation(eb),
fs_info->last_trans_committed);
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
return 0;
error:
btrfs_print_tree(eb, 0);
btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
eb->start);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return ret;
}
/* Checksum all dirty extent buffers in one bio_vec */
@ -1289,12 +1303,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
return root;
}
static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_block_group *block_group;
u64 ret;
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (bytenr)
block_group = btrfs_lookup_block_group(fs_info, bytenr);
else
block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
ASSERT(block_group);
if (!block_group)
return 0;
ret = block_group->global_root_id;
btrfs_put_block_group(block_group);
return ret;
}
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_key key = {
.objectid = BTRFS_CSUM_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
.offset = btrfs_global_root_id(fs_info, bytenr),
};
return btrfs_global_root(fs_info, &key);
@ -1305,7 +1340,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
struct btrfs_key key = {
.objectid = BTRFS_EXTENT_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
.offset = btrfs_global_root_id(fs_info, bytenr),
};
return btrfs_global_root(fs_info, &key);
@ -1522,7 +1557,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
ret = PTR_ERR(root->node);
root->node = NULL;
goto fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
}
if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
goto fail;
}
@ -1727,6 +1763,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_info->uuid_root);
btrfs_put_root(fs_info->fs_root);
btrfs_put_root(fs_info->data_reloc_root);
btrfs_put_root(fs_info->block_group_root);
btrfs_check_leaked_roots(fs_info);
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
@ -1925,8 +1962,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
static int cleaner_kthread(void *arg)
{
struct btrfs_root *root = arg;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
int again;
while (1) {
@ -1959,7 +1995,7 @@ static int cleaner_kthread(void *arg)
btrfs_run_delayed_iputs(fs_info);
again = btrfs_clean_one_deleted_snapshot(root);
again = btrfs_clean_one_deleted_snapshot(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
/*
@ -2095,8 +2131,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
{
const int next_backup = info->backup_root_index;
struct btrfs_root_backup *root_backup;
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
root_backup = info->super_for_commit->super_roots + next_backup;
@ -2121,11 +2155,30 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_chunk_root_level(root_backup,
btrfs_header_level(info->chunk_root->node));
btrfs_set_backup_extent_root(root_backup, extent_root->node->start);
btrfs_set_backup_extent_root_gen(root_backup,
btrfs_header_generation(extent_root->node));
btrfs_set_backup_extent_root_level(root_backup,
btrfs_header_level(extent_root->node));
if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
btrfs_set_backup_block_group_root(root_backup,
info->block_group_root->node->start);
btrfs_set_backup_block_group_root_gen(root_backup,
btrfs_header_generation(info->block_group_root->node));
btrfs_set_backup_block_group_root_level(root_backup,
btrfs_header_level(info->block_group_root->node));
} else {
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
btrfs_set_backup_extent_root(root_backup,
extent_root->node->start);
btrfs_set_backup_extent_root_gen(root_backup,
btrfs_header_generation(extent_root->node));
btrfs_set_backup_extent_root_level(root_backup,
btrfs_header_level(extent_root->node));
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
btrfs_set_backup_csum_root_gen(root_backup,
btrfs_header_generation(csum_root->node));
btrfs_set_backup_csum_root_level(root_backup,
btrfs_header_level(csum_root->node));
}
/*
* we might commit during log recovery, which happens before we set
@ -2146,12 +2199,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_dev_root_level(root_backup,
btrfs_header_level(info->dev_root->node));
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
btrfs_set_backup_csum_root_gen(root_backup,
btrfs_header_generation(csum_root->node));
btrfs_set_backup_csum_root_level(root_backup,
btrfs_header_level(csum_root->node));
btrfs_set_backup_total_bytes(root_backup,
btrfs_super_total_bytes(info->super_copy));
btrfs_set_backup_bytes_used(root_backup,
@ -2269,6 +2316,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
free_root_extent_buffers(info->uuid_root);
free_root_extent_buffers(info->fs_root);
free_root_extent_buffers(info->data_reloc_root);
free_root_extent_buffers(info->block_group_root);
if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
}
@ -2504,11 +2552,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
log_tree_root->node = NULL;
btrfs_put_root(log_tree_root);
return ret;
} else if (!extent_buffer_uptodate(log_tree_root->node)) {
}
if (!extent_buffer_uptodate(log_tree_root->node)) {
btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root);
return -EIO;
}
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
@ -2533,6 +2583,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
{
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_root *root;
u64 max_global_id = 0;
int ret;
struct btrfs_key key = {
.objectid = objectid,
@ -2568,6 +2619,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
break;
btrfs_release_path(path);
/*
* Just worry about this for extent tree, it'll be the same for
* everybody.
*/
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
max_global_id = max(max_global_id, key.offset);
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
@ -2585,6 +2643,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
}
btrfs_release_path(path);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
fs_info->nr_global_roots = max_global_id + 1;
if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
@ -2930,6 +2991,56 @@ out:
return ret;
}
static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
{
int ret = 0;
root->node = read_tree_block(root->fs_info, bytenr,
root->root_key.objectid, gen, level, NULL);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
return ret;
}
if (!extent_buffer_uptodate(root->node)) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
}
btrfs_set_root_node(&root->root_item, root->node);
root->commit_root = btrfs_root_node(root);
btrfs_set_root_refs(&root->root_item, 1);
return ret;
}
static int load_important_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_super_block *sb = fs_info->super_copy;
u64 gen, bytenr;
int level, ret;
bytenr = btrfs_super_root(sb);
gen = btrfs_super_generation(sb);
level = btrfs_super_root_level(sb);
ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
if (ret) {
btrfs_warn(fs_info, "couldn't read tree root");
return ret;
}
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
bytenr = btrfs_super_block_group_root(sb);
gen = btrfs_super_block_group_root_generation(sb);
level = btrfs_super_block_group_root_level(sb);
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
if (ret)
btrfs_warn(fs_info, "couldn't read block group root");
return ret;
}
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
{
int backup_index = find_newest_super_backup(fs_info);
@ -2939,10 +3050,17 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
int ret = 0;
int i;
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
u64 generation;
int level;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
struct btrfs_root *root;
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
GFP_KERNEL);
if (!root)
return -ENOMEM;
fs_info->block_group_root = root;
}
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
if (handle_error) {
if (!IS_ERR(tree_root->node))
free_extent_buffer(tree_root->node);
@ -2967,29 +3085,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
if (ret < 0)
return ret;
}
generation = btrfs_super_generation(sb);
level = btrfs_super_root_level(sb);
tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
BTRFS_ROOT_TREE_OBJECTID,
generation, level, NULL);
if (IS_ERR(tree_root->node)) {
handle_error = true;
ret = PTR_ERR(tree_root->node);
tree_root->node = NULL;
btrfs_warn(fs_info, "couldn't read tree root");
continue;
} else if (!extent_buffer_uptodate(tree_root->node)) {
ret = load_important_roots(fs_info);
if (ret) {
handle_error = true;
ret = -EIO;
btrfs_warn(fs_info, "error while reading tree root");
continue;
}
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
btrfs_set_root_refs(&tree_root->root_item, 1);
/*
* No need to hold btrfs_root::objectid_mutex since the fs
* hasn't been fully initialised and we are the only user
@ -3009,8 +3111,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
}
/* All successful */
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->generation = btrfs_header_generation(tree_root->node);
fs_info->last_trans_committed = fs_info->generation;
fs_info->last_reloc_trans = 0;
/* Always begin writing backup roots after the one being used */
@ -3293,7 +3395,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
up_read(&fs_info->cleanup_work_sem);
mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(fs_info->tree_root);
ret = btrfs_recover_relocation(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
@ -3594,21 +3696,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
generation = btrfs_super_chunk_root_generation(disk_super);
level = btrfs_super_chunk_root_level(disk_super);
chunk_root->node = read_tree_block(fs_info,
btrfs_super_chunk_root(disk_super),
BTRFS_CHUNK_TREE_OBJECTID,
generation, level, NULL);
if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) {
ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
generation, level);
if (ret) {
btrfs_err(fs_info, "failed to read chunk root");
if (!IS_ERR(chunk_root->node))
free_extent_buffer(chunk_root->node);
chunk_root->node = NULL;
goto fail_tree_roots;
}
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
chunk_root->commit_root = btrfs_root_node(chunk_root);
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
offsetof(struct btrfs_header, chunk_tree_uuid),
@ -3728,7 +3821,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
goto fail_sysfs;

View File

@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0);
}

View File

@ -598,7 +598,7 @@ fail:
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int refs_to_drop, int *last_ref)
int refs_to_drop)
{
struct btrfs_key key;
struct btrfs_extent_data_ref *ref1 = NULL;
@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path);
*last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@ -1072,8 +1071,7 @@ static noinline_for_stack
void update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op,
int *last_ref)
struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_extent_item *ei;
@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
else
btrfs_set_shared_data_ref_count(leaf, sref, refs);
} else {
*last_ref = 1;
size = btrfs_extent_inline_ref_size(type);
item_size = btrfs_item_size(leaf, path->slots[0]);
ptr = (unsigned long)iref;
@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
return -EUCLEAN;
}
update_inline_extent_backref(path, iref, refs_to_add,
extent_op, NULL);
update_inline_extent_backref(path, iref, refs_to_add, extent_op);
} else if (ret == -ENOENT) {
setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset,
@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref)
int refs_to_drop, int is_data)
{
int ret = 0;
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
last_ref);
} else if (is_data) {
ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
last_ref);
} else {
*last_ref = 1;
if (iref)
update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
else if (is_data)
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
else
ret = btrfs_del_item(trans, root, path);
}
return ret;
}
@ -2766,12 +2758,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_unlock(&cache->lock);
if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
u64 to_add = len;
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
to_add = min(len, global_rsv->size -
global_rsv->reserved);
u64 to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(fs_info,
space_info, to_add);
@ -2862,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
return 0;
}
static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool is_data)
{
int ret;
if (is_data) {
struct btrfs_root *csum_root;
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
}
/*
* Drop one or more refs of @node.
*
@ -2943,7 +2963,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 refs;
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
int last_ref = 0;
bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
extent_root = btrfs_extent_root(info, bytenr);
@ -3010,8 +3029,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop, is_data,
&last_ref);
NULL, refs_to_drop, is_data);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@ -3136,8 +3154,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop, is_data,
&last_ref);
iref, refs_to_drop, is_data);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@ -3182,7 +3199,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
last_ref = 1;
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
@ -3191,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
if (is_data) {
struct btrfs_root *csum_root;
csum_root = btrfs_csum_root(info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr,
num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
}
btrfs_release_path(path);
@ -4605,6 +4600,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
return ret;
}
static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
u64 num_bytes)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
if (ret) {
ASSERT(!ret);
btrfs_err(fs_info, "update block group failed for %llu %llu",
bytenr, num_bytes);
return ret;
}
trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
return 0;
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
@ -4665,18 +4682,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
BUG();
}
trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
return ret;
return alloc_reserved_extent(trans, ins->objectid, ins->offset);
}
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
@ -4694,7 +4700,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_delayed_tree_ref *ref;
u32 size = sizeof(*extent_item) + sizeof(*iref);
u64 num_bytes;
u64 flags = extent_op->flags_to_set;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@ -4704,12 +4709,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) {
extent_key.offset = ref->level;
extent_key.type = BTRFS_METADATA_ITEM_KEY;
num_bytes = fs_info->nodesize;
} else {
extent_key.offset = node->num_bytes;
extent_key.type = BTRFS_EXTENT_ITEM_KEY;
size += sizeof(*block_info);
num_bytes = node->num_bytes;
}
path = btrfs_alloc_path();
@ -4754,22 +4757,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
ret = remove_from_free_space_tree(trans, extent_key.objectid,
num_bytes);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, extent_key.objectid,
fs_info->nodesize, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
extent_key.objectid, extent_key.offset);
BUG();
}
trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
fs_info->nodesize);
return ret;
return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
}
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,

View File

@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
* a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us.
*/
ASSERT(failed_mirror);
failrec->failed_mirror = failed_mirror;
failrec->this_mirror++;
if (failrec->this_mirror == failed_mirror)
@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio;
struct btrfs_bio *repair_bbio;
blk_status_t status;
btrfs_debug(fs_info,
"repair read error: read error at %llu", start);
@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
"repair read error: submitting new read to mirror %d",
failrec->this_mirror);
status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
failrec->bio_flags);
if (status) {
free_io_failure(failure_tree, tree, failrec);
bio_put(repair_bio);
}
return blk_status_to_errno(status);
/*
* At this point we have a bio, so any errors from submit_bio_hook()
* will be handled by the endio on the repair_bio, so we can't return an
* error here.
*/
submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
return BLK_STS_OK;
}
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
@ -3067,6 +3067,14 @@ static void end_bio_extent_readpage(struct bio *bio)
goto readpage_ok;
if (is_data_inode(inode)) {
/*
* If we failed to submit the IO at all we'll have a
* mirror_num == 0, in which case we need to just mark
* the page with an error and unlock it and carry on.
*/
if (mirror == 0)
goto readpage_ok;
/*
* btrfs_submit_read_repair() will handle all the good
* and bad sectors, we just continue to the next bvec.
@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
}
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
if (em_cached && !IS_ERR_OR_NULL(em)) {
if (em_cached && !IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
u64 cur_end;
struct extent_map *em;
int ret = 0;
int nr = 0;
size_t pg_offset = 0;
size_t iosize;
size_t blocksize = inode->i_sb->s_blocksize;
@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, em_cached);
if (IS_ERR_OR_NULL(em)) {
if (IS_ERR(em)) {
unlock_extent(tree, cur, end);
end_page_read(page, false, cur, end + 1 - cur);
ret = PTR_ERR(em);
break;
}
extent_offset = cur - em->start;
@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
end_bio_extent_readpage, 0,
this_bio_flag,
force_bio_submit);
if (!ret) {
nr++;
} else {
if (ret) {
unlock_extent(tree, cur, cur + iosize - 1);
end_page_read(page, false, cur, iosize);
goto out;
@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
}
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
if (IS_ERR_OR_NULL(em)) {
if (IS_ERR(em)) {
btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
ret = PTR_ERR_OR_ZERO(em);
break;
@ -4780,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
return ret;
}
if (cache) {
/* Impiles write in zoned mode */
btrfs_put_block_group(cache);
/* Mark the last eb in a block group */
/*
* Implies write in zoned mode. Mark the last eb in a block group.
*/
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
btrfs_put_block_group(cache);
}
ret = write_one_eb(eb, wbc, epd);
free_extent_buffer(eb);
@ -5390,7 +5397,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
break;
len = ALIGN(len, sectorsize);
em = btrfs_get_extent_fiemap(inode, offset, len);
if (IS_ERR_OR_NULL(em))
if (IS_ERR(em))
return em;
/* if this isn't a hole return it */

View File

@ -492,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
*/
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
lockdep_assert_held_write(&tree->lock);
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
@ -506,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *new,
int modified)
{
lockdep_assert_held_write(&tree->lock);
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
ASSERT(extent_map_in_tree(cur));
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))

View File

@ -305,7 +305,7 @@ found:
read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
ret * csum_size);
out:
if (ret == -ENOENT)
if (ret == -ENOENT || ret == -EFBIG)
ret = 0;
return ret;
}
@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_path *path;
const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
u8 *csum;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
int count = 0;
blk_status_t ret = BLK_STS_OK;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
return BLK_STS_RESOURCE;
if (!dst) {
struct btrfs_bio *bbio = btrfs_bio(bio);
bbio = btrfs_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
search_len, csum_dst);
if (count <= 0) {
/*
* Either we hit a critical error or we didn't find
* the csum.
* Either way, we put zero into the csums dst, and skip
* to the next sector.
*/
if (count < 0) {
ret = errno_to_blk_status(count);
if (bbio)
btrfs_bio_free_csum(bbio);
break;
}
/*
* We didn't find a csum for this range. We need to make sure
* we complain loudly about this, because we are not NODATASUM.
*
* However for the DATA_RELOC inode we could potentially be
* relocating data extents for a NODATASUM inode, so the inode
* itself won't be marked with NODATASUM, but the extent we're
* copying is in fact NODATASUM. If we don't find a csum we
* assume this is the case.
*/
if (count == 0) {
memset(csum_dst, 0, csum_size);
count = 1;
/*
* For data reloc inode, we need to mark the range
* NODATASUM so that balance won't report false csum
* error.
*/
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset;
@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
}
btrfs_free_path(path);
return BLK_STS_OK;
return ret;
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
@ -612,32 +620,33 @@ fail:
return ret;
}
/*
* btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
/**
* Calculate checksums of the data contained inside a bio
*
* @inode: Owner of the data inside the bio
* @bio: Contains the data to be checksummed
* @file_start: offset in file this bio begins to describe
* @contig: Boolean. If true/1 means all bio vecs in this bio are
* contiguous and they begin at @file_start in the file. False/0
* means this bio can contain potentially discontiguous bio vecs
* so the logical offset of each should be calculated separately.
* @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the
* file offsets are determined from the page offsets in the bio.
* Otherwise, this is the starting file offset of the bio vecs in
* @bio, which must be contiguous.
* @one_ordered: If true, @bio only refers to one ordered extent.
*/
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig)
u64 offset, bool one_ordered)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
const bool use_page_offsets = (offset == (u64)-1);
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
int index;
int nr_sectors;
unsigned int blockcount;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
int i;
u64 offset;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@ -651,18 +660,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
if (contig)
offset = file_start;
else
offset = 0; /* shut up gcc */
sums->bytenr = bio->bi_iter.bi_sector << 9;
index = 0;
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
if (!contig)
if (use_page_offsets)
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
if (!ordered) {
@ -681,13 +685,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
}
}
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < nr_sectors; i++) {
if (offset >= ordered->file_offset + ordered->num_bytes ||
offset < ordered->file_offset) {
for (i = 0; i < blockcount; i++) {
if (!one_ordered &&
!in_range(offset, ordered->file_offset,
ordered->num_bytes)) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
@ -1211,6 +1216,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
extent_start = key.offset;
extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;

View File

@ -691,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int modify_tree = -1;
int update_refs;
int found = 0;
int leafs_visited = 0;
struct btrfs_path *path = args->path;
args->bytes_found = 0;
@ -729,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path->slots[0]--;
}
ret = 0;
leafs_visited++;
next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@ -741,7 +739,6 @@ next_slot:
ret = 0;
break;
}
leafs_visited++;
leaf = path->nodes[0];
recow = 1;
}
@ -987,7 +984,7 @@ delete_extent_item:
* which case it unlocked our path, so check path->locks[0] matches a
* write lock.
*/
if (!ret && args->replace_extent && leafs_visited == 1 &&
if (!ret && args->replace_extent &&
path->locks[0] == BTRFS_WRITE_LOCK &&
btrfs_leaf_free_space(leaf) >=
sizeof(struct btrfs_item) + args->extent_item_size) {
@ -1722,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
fs_info->sectorsize);
WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
reserve_bytes);
reserve_bytes,
reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(BTRFS_I(inode),
@ -2039,12 +2037,43 @@ out:
return err < 0 ? err : written;
}
static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
struct iov_iter *from)
static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t count;
ssize_t ret;
btrfs_inode_lock(inode, 0);
count = encoded->len;
ret = generic_write_checks_count(iocb, &count);
if (ret == 0 && count != encoded->len) {
/*
* The write got truncated by generic_write_checks_count(). We
* can't do a partial encoded write.
*/
ret = -EFBIG;
}
if (ret || encoded->len == 0)
goto out;
ret = btrfs_write_check(iocb, from, encoded->len);
if (ret < 0)
goto out;
ret = btrfs_do_encoded_write(iocb, from, encoded);
out:
btrfs_inode_unlock(inode, 0);
return ret;
}
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded)
{
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written = 0;
ssize_t num_written, num_sync;
const bool sync = iocb->ki_flags & IOCB_DSYNC;
/*
@ -2055,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (BTRFS_FS_ERROR(inode->root->fs_info))
return -EROFS;
if (!(iocb->ki_flags & IOCB_DIRECT) &&
(iocb->ki_flags & IOCB_NOWAIT))
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EOPNOTSUPP;
if (sync)
atomic_inc(&inode->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT)
num_written = btrfs_direct_write(iocb, from);
else
num_written = btrfs_buffered_write(iocb, from);
if (encoded) {
num_written = btrfs_encoded_write(iocb, from, encoded);
num_sync = encoded->len;
} else if (iocb->ki_flags & IOCB_DIRECT) {
num_written = num_sync = btrfs_direct_write(iocb, from);
} else {
num_written = num_sync = btrfs_buffered_write(iocb, from);
}
btrfs_set_inode_last_sub_trans(inode);
if (num_written > 0)
num_written = generic_write_sync(iocb, num_written);
if (num_sync > 0) {
num_sync = generic_write_sync(iocb, num_sync);
if (num_sync < 0)
num_written = num_sync;
}
if (sync)
atomic_dec(&inode->sync_writers);
@ -2079,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
return num_written;
}
static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
return btrfs_do_write_iter(iocb, from, NULL);
}
int btrfs_release_file(struct inode *inode, struct file *filp)
{
struct btrfs_file_private *private = filp->private_data;
@ -2474,7 +2514,7 @@ out:
hole_em = alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
btrfs_set_inode_full_sync(inode);
} else {
hole_em->start = offset;
hole_em->len = end - offset;
@ -2495,8 +2535,7 @@ out:
} while (ret == -EEXIST);
free_extent_map(hole_em);
if (ret)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&inode->runtime_flags);
btrfs_set_inode_full_sync(inode);
}
return 0;
@ -2850,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* maps for the replacement extents (or holes).
*/
if (extent_info && !extent_info->is_new_extent)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
btrfs_set_inode_full_sync(inode);
if (ret)
goto out_trans;

View File

@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
.offset = 0,
};
if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
key.offset = block_group->global_root_id;
return btrfs_global_root(block_group->fs_info, &key);
}

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@
#include <linux/iversion.h>
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#include <linux/sched/xacct.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@ -88,6 +89,24 @@ struct btrfs_ioctl_send_args_32 {
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
struct btrfs_ioctl_encoded_io_args_32 {
compat_uptr_t iov;
compat_ulong_t iovcnt;
__s64 offset;
__u64 flags;
__u64 len;
__u64 unencoded_len;
__u64 unencoded_offset;
__u32 compression;
__u32 encryption;
__u8 reserved[64];
};
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args_32)
#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args_32)
#endif
/* Mask out flags that are inappropriate for the given type of inode. */
@ -440,10 +459,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
}
}
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
{
struct inode *inode = file_inode(file);
return put_user(inode->i_generation, arg);
}
@ -753,6 +770,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_trans_handle *trans;
int ret;
/* We do not support snapshotting right now. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_warn(fs_info,
"extent tree v2 doesn't support snapshotting yet");
return -EOPNOTSUPP;
}
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
@ -1522,6 +1546,7 @@ next:
}
#define CLUSTER_SIZE (SZ_256K)
static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
/*
* Defrag one contiguous target range.
@ -1667,7 +1692,6 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
LIST_HEAD(target_list);
int ret;
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
ret = defrag_collect_targets(inode, start, len, extent_thresh,
newer_than, do_compress, false,
&target_list, NULL);
@ -1810,9 +1834,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 last_scanned = cur;
u64 cluster_end;
/* The cluster size 256K should always be page aligned */
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
if (btrfs_defrag_cancelled(fs_info)) {
ret = -EAGAIN;
break;
@ -2229,10 +2250,9 @@ free_args:
return ret;
}
static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
void __user *arg)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
@ -2562,12 +2582,11 @@ err:
return ret;
}
static noinline int btrfs_ioctl_tree_search(struct file *file,
void __user *argp)
static noinline int btrfs_ioctl_tree_search(struct inode *inode,
void __user *argp)
{
struct btrfs_ioctl_search_args __user *uargs;
struct btrfs_ioctl_search_key sk;
struct inode *inode;
int ret;
size_t buf_size;
@ -2581,7 +2600,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
buf_size = sizeof(uargs->buf);
inode = file_inode(file);
ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
/*
@ -2596,12 +2614,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
return ret;
}
static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
void __user *argp)
{
struct btrfs_ioctl_search_args_v2 __user *uarg;
struct btrfs_ioctl_search_args_v2 args;
struct inode *inode;
int ret;
size_t buf_size;
const size_t buf_limit = SZ_16M;
@ -2620,7 +2637,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
if (buf_size > buf_limit)
buf_size = buf_limit;
inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size,
(char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
@ -2871,25 +2887,22 @@ out:
return ret;
}
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
void __user *argp)
{
struct btrfs_ioctl_ino_lookup_args *args;
struct inode *inode;
int ret = 0;
args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args))
return PTR_ERR(args);
inode = file_inode(file);
/*
* Unprivileged query to obtain the containing subvolume root id. The
* path is reset so it's consistent with btrfs_search_path_in_tree.
*/
if (args->treeid == 0)
args->treeid = BTRFS_I(inode)->root->root_key.objectid;
args->treeid = root->root_key.objectid;
if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
args->name[0] = 0;
@ -2901,7 +2914,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
goto out;
}
ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
ret = btrfs_search_path_in_tree(root->fs_info,
args->treeid, args->objectid,
args->name);
@ -2957,7 +2970,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
}
/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
{
struct btrfs_ioctl_get_subvol_info_args *subvol_info;
struct btrfs_fs_info *fs_info;
@ -2969,7 +2982,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
struct extent_buffer *leaf;
unsigned long item_off;
unsigned long item_len;
struct inode *inode;
int slot;
int ret = 0;
@ -2983,7 +2995,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
return -ENOMEM;
}
inode = file_inode(file);
fs_info = BTRFS_I(inode)->root->fs_info;
/* Get root_item of inode's subvolume */
@ -3077,15 +3088,14 @@ out_free:
* Return ROOT_REF information of the subvolume containing this inode
* except the subvolume name.
*/
static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
void __user *argp)
{
struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
struct btrfs_root_ref *rref;
struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *leaf;
struct inode *inode;
u64 objectid;
int slot;
int ret;
@ -3101,15 +3111,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
return PTR_ERR(rootrefs);
}
inode = file_inode(file);
root = BTRFS_I(inode)->root->fs_info->tree_root;
objectid = BTRFS_I(inode)->root->root_key.objectid;
objectid = root->root_key.objectid;
key.objectid = objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = rootrefs->min_treeid;
found = 0;
root = root->fs_info->tree_root;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
goto out;
@ -3189,6 +3197,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int err = 0;
bool destroy_parent = false;
/* We don't support snapshots with extent tree v2 yet. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"extent tree v2 doesn't support snapshot deletion yet");
return -EOPNOTSUPP;
}
if (destroy_v2) {
vol_args2 = memdup_user(arg, sizeof(*vol_args2));
if (IS_ERR(vol_args2))
@ -3464,6 +3479,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
return -EINVAL;
}
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@ -3989,6 +4009,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
return -EINVAL;
}
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
@ -4088,6 +4113,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
return -EINVAL;
}
p = memdup_user(arg, sizeof(*p));
if (IS_ERR(p))
return PTR_ERR(p);
@ -5149,7 +5179,7 @@ out_drop_write:
return ret;
}
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
@ -5179,11 +5209,194 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (IS_ERR(arg))
return PTR_ERR(arg);
}
ret = btrfs_ioctl_send(file, arg);
ret = btrfs_ioctl_send(inode, arg);
kfree(arg);
return ret;
}
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
bool compat)
{
struct btrfs_ioctl_encoded_io_args args = { 0 };
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
flags);
if (copy_from_user(&args32, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
args.iov = compat_ptr(args32.iov);
args.iovcnt = args32.iovcnt;
args.offset = args32.offset;
args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
if (copy_from_user(&args, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
}
if (args.flags != 0) {
ret = -EINVAL;
goto out_acct;
}
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_iov;
}
pos = args.offset;
ret = rw_verify_area(READ, file, &pos, args.len);
if (ret < 0)
goto out_iov;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos;
ret = btrfs_encoded_read(&kiocb, &iter, &args);
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
(char *)&args + copy_end_kernel,
sizeof(args) - copy_end_kernel))
ret = -EFAULT;
}
out_iov:
kfree(iov);
out_acct:
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
{
struct btrfs_ioctl_encoded_io_args args;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out_acct;
}
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
if (copy_from_user(&args32, argp, sizeof(args32))) {
ret = -EFAULT;
goto out_acct;
}
args.iov = compat_ptr(args32.iov);
args.iovcnt = args32.iovcnt;
args.offset = args32.offset;
args.flags = args32.flags;
args.len = args32.len;
args.unencoded_len = args32.unencoded_len;
args.unencoded_offset = args32.unencoded_offset;
args.compression = args32.compression;
args.encryption = args32.encryption;
memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
#else
return -ENOTTY;
#endif
} else {
if (copy_from_user(&args, argp, sizeof(args))) {
ret = -EFAULT;
goto out_acct;
}
}
ret = -EINVAL;
if (args.flags != 0)
goto out_acct;
if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
goto out_acct;
if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
goto out_acct;
if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
goto out_acct;
if (args.unencoded_offset > args.unencoded_len)
goto out_acct;
if (args.len > args.unencoded_len - args.unencoded_offset)
goto out_acct;
ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
file_start_write(file);
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_end_write;
}
pos = args.offset;
ret = rw_verify_area(WRITE, file, &pos, args.len);
if (ret < 0)
goto out_end_write;
init_sync_kiocb(&kiocb, file);
ret = kiocb_set_rw_flags(&kiocb, 0);
if (ret)
goto out_end_write;
kiocb.ki_pos = pos;
ret = btrfs_do_write_iter(&kiocb, &iter, &args);
if (ret > 0)
fsnotify_modify(file);
out_end_write:
file_end_write(file);
kfree(iov);
out_acct:
if (ret > 0)
add_wchar(current, ret);
inc_syscw(current);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@ -5194,7 +5407,7 @@ long btrfs_ioctl(struct file *file, unsigned int
switch (cmd) {
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
return btrfs_ioctl_getversion(inode, argp);
case FS_IOC_GETFSLABEL:
return btrfs_ioctl_get_fslabel(fs_info, argp);
case FS_IOC_SETFSLABEL:
@ -5214,7 +5427,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SNAP_DESTROY_V2:
return btrfs_ioctl_snap_destroy(file, argp, true);
case BTRFS_IOC_SUBVOL_GETFLAGS:
return btrfs_ioctl_subvol_getflags(file, argp);
return btrfs_ioctl_subvol_getflags(inode, argp);
case BTRFS_IOC_SUBVOL_SETFLAGS:
return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@ -5238,11 +5451,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TREE_SEARCH:
return btrfs_ioctl_tree_search(file, argp);
return btrfs_ioctl_tree_search(inode, argp);
case BTRFS_IOC_TREE_SEARCH_V2:
return btrfs_ioctl_tree_search_v2(file, argp);
return btrfs_ioctl_tree_search_v2(inode, argp);
case BTRFS_IOC_INO_LOOKUP:
return btrfs_ioctl_ino_lookup(file, argp);
return btrfs_ioctl_ino_lookup(root, argp);
case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO:
@ -5289,10 +5502,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
case BTRFS_IOC_SEND:
return _btrfs_ioctl_send(file, argp, false);
return _btrfs_ioctl_send(inode, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(file, argp, true);
return _btrfs_ioctl_send(inode, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
@ -5319,15 +5532,25 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
case BTRFS_IOC_GET_SUBVOL_INFO:
return btrfs_ioctl_get_subvol_info(file, argp);
return btrfs_ioctl_get_subvol_info(inode, argp);
case BTRFS_IOC_GET_SUBVOL_ROOTREF:
return btrfs_ioctl_get_subvol_rootref(file, argp);
return btrfs_ioctl_get_subvol_rootref(root, argp);
case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp);
case FS_IOC_ENABLE_VERITY:
return fsverity_ioctl_enable(file, (const void __user *)argp);
case FS_IOC_MEASURE_VERITY:
return fsverity_ioctl_measure(file, argp);
case BTRFS_IOC_ENCODED_READ:
return btrfs_ioctl_encoded_read(file, argp, false);
case BTRFS_IOC_ENCODED_WRITE:
return btrfs_ioctl_encoded_write(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_ENCODED_READ_32:
return btrfs_ioctl_encoded_read(file, argp, true);
case BTRFS_IOC_ENCODED_WRITE_32:
return btrfs_ioctl_encoded_write(file, argp, true);
#endif
}
return -ENOTTY;

View File

@ -55,6 +55,9 @@
* 0x1000 | SegHdr N+1| Data payload N+1 ... |
*/
#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@ -380,7 +383,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
kunmap(cur_page);
cur_in += LZO_LEN;
if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
if (seg_len > WORKSPACE_CBUF_LENGTH) {
/*
* seg_len shouldn't be larger than we have allocated
* for workspace->cbuf
@ -433,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
size_t max_segment_len = WORKSPACE_BUF_LENGTH;
int ret = 0;
char *kaddr;
unsigned long bytes;

View File

@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
/*
* Allocate and add a new ordered_extent into the per-inode tree.
/**
* Add an ordered extent to the per-inode tree.
*
* The tree is given a single reference on the ordered extent that was
* inserted.
* @inode: Inode that this extent is for.
* @file_offset: Logical offset in file where the extent starts.
* @num_bytes: Logical length of extent in file.
* @ram_bytes: Full length of unencoded data.
* @disk_bytenr: Offset of extent on disk.
* @disk_num_bytes: Size of extent on disk.
* @offset: Offset into unencoded data where file data starts.
* @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
* @compress_type: Compression algorithm used for data.
*
* Most of these parameters correspond to &struct btrfs_file_extent_item. The
* tree is given a single reference on the ordered extent that was inserted.
*
* Return: 0 or -ENOMEM.
*/
static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type, int dio,
int compress_type)
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned flags,
int compress_type)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
struct btrfs_ordered_extent *entry;
int ret;
if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return -ENOMEM;
entry->file_offset = file_offset;
entry->disk_bytenr = disk_bytenr;
entry->num_bytes = num_bytes;
entry->ram_bytes = ram_bytes;
entry->disk_bytenr = disk_bytenr;
entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset;
entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_PREALLOC ||
type == BTRFS_ORDERED_COMPRESSED);
set_bit(type, &entry->flags);
ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
entry->flags = flags;
percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
fs_info->delalloc_batch);
if (dio)
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
/* one ref for the tree */
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return 0;
}
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type)
{
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 0,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type)
{
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 1,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int compress_type)
{
ASSERT(compress_type != BTRFS_COMPRESS_NONE);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes,
BTRFS_ORDERED_COMPRESSED, 0,
compress_type);
}
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one
@ -548,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
false);
if (root != fs_info->tree_root) {
u64 release;
if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
release = entry->disk_num_bytes;
else
release = entry->num_bytes;
btrfs_delalloc_release_metadata(btrfs_inode, release, false);
}
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
fs_info->delalloc_batch);
@ -1052,42 +1032,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 file_offset = ordered->file_offset + pos;
u64 disk_bytenr = ordered->disk_bytenr + pos;
u64 num_bytes = len;
u64 disk_num_bytes = len;
int type;
unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
int compress_type = ordered->compress_type;
unsigned long weight;
int ret;
weight = hweight_long(flags_masked);
WARN_ON_ONCE(weight > 1);
if (!weight)
type = 0;
else
type = __ffs(flags_masked);
unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
/*
* The splitting extent is already counted and will be added again
* in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
* double counting.
* The splitting extent is already counted and will be added again in
* btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
*/
percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
fs_info->delalloc_batch);
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
WARN_ON_ONCE(1);
ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
file_offset, disk_bytenr, num_bytes,
disk_num_bytes, compress_type);
} else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
disk_bytenr, num_bytes, disk_num_bytes, type);
} else {
ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
disk_bytenr, num_bytes, disk_num_bytes, type);
}
return ret;
WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
disk_bytenr, len, 0, flags,
ordered->compress_type);
}
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,

View File

@ -74,8 +74,18 @@ enum {
BTRFS_ORDERED_LOGGED_CSUM,
/* We wait for this extent to complete in the current transaction */
BTRFS_ORDERED_PENDING,
/* BTRFS_IOC_ENCODED_WRITE */
BTRFS_ORDERED_ENCODED,
};
/* BTRFS_ORDERED_* flags that specify the type of the extent. */
#define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) | \
(1UL << BTRFS_ORDERED_NOCOW) | \
(1UL << BTRFS_ORDERED_PREALLOC) | \
(1UL << BTRFS_ORDERED_COMPRESSED) | \
(1UL << BTRFS_ORDERED_DIRECT) | \
(1UL << BTRFS_ORDERED_ENCODED))
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@ -84,9 +94,11 @@ struct btrfs_ordered_extent {
* These fields directly correspond to the same fields in
* btrfs_file_extent_item.
*/
u64 disk_bytenr;
u64 num_bytes;
u64 ram_bytes;
u64 disk_bytenr;
u64 disk_num_bytes;
u64 offset;
/* number of bytes that still need writing */
u64 bytes_left;
@ -179,14 +191,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type);
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type);
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int compress_type);
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned flags,
int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,

View File

@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
{ BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" },
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" },
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" },
};
@ -391,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_header_owner(c),
btrfs_node_ptr_generation(c, i),
level - 1, &first_key);
if (IS_ERR(next)) {
if (IS_ERR(next))
continue;
} else if (!extent_buffer_uptodate(next)) {
if (!extent_buffer_uptodate(next)) {
free_extent_buffer(next);
continue;
}

View File

@ -25,18 +25,6 @@
#include "sysfs.h"
#include "tree-mod-log.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
* - copy also limits on subvol creation
* - limit
* - caches for ulists
* - performance benchmarks
* - check all ioctl parameters
*/
/*
* Helpers to access qgroup reservation
*
@ -258,16 +246,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
return 0;
}
/* must be called with qgroup_lock held */
static int add_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid)
/*
* Add relation specified by two qgroups.
*
* Must be called with qgroup_lock held.
*
* Return: 0 on success
* -ENOENT if one of the qgroups is NULL
* <0 other errors
*/
static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
{
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
struct btrfs_qgroup_list *list;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
if (!member || !parent)
return -ENOENT;
@ -283,7 +274,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
return 0;
}
/* must be called with qgroup_lock held */
/*
* Add relation specified by two qgoup ids.
*
* Must be called with qgroup_lock held.
*
* Return: 0 on success
* -ENOENT if one of the ids does not exist
* <0 other errors
*/
static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
{
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
return __add_relation_rb(member, parent);
}
/* Must be called with qgroup_lock held */
static int del_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid)
{
@ -948,6 +959,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
*/
lockdep_assert_held_write(&fs_info->subvol_sem);
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"qgroups are currently unsupported in extent tree v2");
return -EINVAL;
}
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root)
goto out;
@ -1451,7 +1468,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
}
spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(fs_info, src, dst);
ret = __add_relation_rb(member, parent);
if (ret < 0) {
spin_unlock(&fs_info->qgroup_lock);
goto out;
@ -3268,7 +3285,8 @@ out:
static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
{
return btrfs_fs_closing(fs_info) ||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
}
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@ -3298,11 +3316,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
err = PTR_ERR(trans);
break;
}
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
err = -EINTR;
} else {
err = qgroup_rescan_leaf(trans, path);
}
err = qgroup_rescan_leaf(trans, path);
if (err > 0)
btrfs_commit_transaction(trans);
else
@ -3316,7 +3332,7 @@ out:
if (err > 0 &&
fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
} else if (err < 0) {
} else if (err < 0 || stopped) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
mutex_unlock(&fs_info->qgroup_rescan_lock);

View File

@ -277,7 +277,7 @@ copy_inline_extent:
path->slots[0]),
size);
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
btrfs_set_inode_full_sync(BTRFS_I(dst));
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
out:
if (!ret && !trans) {
@ -494,7 +494,8 @@ process_slot:
&clone_info, &trans);
if (ret)
goto out;
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
} else {
ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
/*
* Inline extents always have to start at file offset 0
* and can never be bigger then the sector size. We can
@ -505,8 +506,12 @@ process_slot:
*/
ASSERT(key.offset == 0);
ASSERT(datal <= fs_info->sectorsize);
if (key.offset != 0 || datal > fs_info->sectorsize)
return -EUCLEAN;
if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
WARN_ON(key.offset != 0) ||
WARN_ON(datal > fs_info->sectorsize)) {
ret = -EUCLEAN;
goto out;
}
ret = clone_copy_inline_extent(inode, path, &new_key,
drop_start, datal, size,
@ -518,17 +523,22 @@ process_slot:
btrfs_release_path(path);
/*
* If this is a new extent update the last_reflink_trans of both
* inodes. This is used by fsync to make sure it does not log
* multiple checksum items with overlapping ranges. For older
* extents we don't need to do it since inode logging skips the
* checksums for older extents. Also ignore holes and inline
* extents because they don't have checksums in the csum tree.
* Whenever we share an extent we update the last_reflink_trans
* of each inode to the current transaction. This is needed to
* make sure fsync does not log multiple checksum items with
* overlapping ranges (because some extent items might refer
* only to sections of the original extent). For the destination
* inode we do this regardless of the generation of the extents
* or even if they are inline extents or explicit holes, to make
* sure a full fsync does not skip them. For the source inode,
* we only need to update last_reflink_trans in case it's a new
* extent that is not a hole or an inline extent, to deal with
* the checksums problem on fsync.
*/
if (extent_gen == trans->transid && disko > 0) {
if (extent_gen == trans->transid && disko > 0)
BTRFS_I(src)->last_reflink_trans = trans->transid;
BTRFS_I(inode)->last_reflink_trans = trans->transid;
}
BTRFS_I(inode)->last_reflink_trans = trans->transid;
last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize);
@ -575,8 +585,7 @@ process_slot:
* replaced file extent items.
*/
if (last_dest_end >= i_size_read(inode))
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
btrfs_set_inode_full_sync(BTRFS_I(inode));
ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
last_dest_end, destoff + len - 1, NULL, &trans);
@ -772,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (btrfs_root_readonly(root_out))
return -EROFS;
if (file_in->f_path.mnt != file_out->f_path.mnt ||
inode_in->i_sb != inode_out->i_sb)
return -EXDEV;
ASSERT(inode_in->i_sb == inode_out->i_sb);
}
/* Don't make the dst file partly checksummed */

View File

@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
eb = read_tree_block(fs_info, block->bytenr, block->owner,
block->key.offset, block->level, NULL);
if (IS_ERR(eb)) {
if (IS_ERR(eb))
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
/* Reserve metadata for this range */
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
clamped_len);
clamped_len, clamped_len);
if (ret)
goto release_page;
@ -4123,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
* this function resumes merging reloc trees with corresponding fs trees.
* this is important for keeping the sharing of tree blocks
*/
int btrfs_recover_relocation(struct btrfs_root *root)
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_info *fs_info = root->fs_info;
LIST_HEAD(reloc_roots);
struct btrfs_key key;
struct btrfs_root *fs_root;
@ -4166,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
key.type != BTRFS_ROOT_ITEM_KEY)
break;
reloc_root = btrfs_read_tree_root(root, &key);
reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root);
goto out;

View File

@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 generation;
int mirror_num;
struct btrfs_key key;
u64 increment = map->stripe_len;
u64 increment;
u64 offset;
u64 extent_logical;
u64 extent_physical;

View File

@ -528,17 +528,12 @@ out:
static int fs_path_copy(struct fs_path *p, struct fs_path *from)
{
int ret;
p->reversed = from->reversed;
fs_path_reset(p);
ret = fs_path_add_path(p, from);
return ret;
return fs_path_add_path(p, from);
}
static void fs_path_unreverse(struct fs_path *p)
{
char *tmp;
@ -7477,10 +7472,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
root->root_key.objectid, root->dedupe_in_progress);
}
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
{
int ret = 0;
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
struct btrfs_root *send_root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root;
struct send_ctx *sctx = NULL;

View File

@ -126,7 +126,7 @@ enum {
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
#endif
#endif

View File

@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
u64 thresh = div_factor_fine(space_info->total_bytes, 90);
u64 used;
lockdep_assert_held(&space_info->lock);
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved +
global_rsv_size) >= thresh)
@ -1061,7 +1063,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
trans_rsv->reserved;
if (block_rsv_size < space_info->bytes_may_use)
delalloc_size = space_info->bytes_may_use - block_rsv_size;
spin_unlock(&space_info->lock);
/*
* We don't want to include the global_rsv in our calculation,
@ -1092,6 +1093,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
flush = FLUSH_DELAYED_REFS_NR;
}
spin_unlock(&space_info->lock);
/*
* We don't want to reclaim everything, just a portion, so scale
* down the to_reclaim by 1/4. If it takes us down to 0,

View File

@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
#ifdef CONFIG_PRINTK
#define STATE_STRING_PREFACE ": state "
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
/*
* Characters to print to indicate error conditions or uncommon filesystem sate.
* RO is not an error.
*/
static const char fs_state_chars[] = {
[BTRFS_FS_STATE_ERROR] = 'E',
[BTRFS_FS_STATE_REMOUNTING] = 'M',
[BTRFS_FS_STATE_RO] = 0,
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
};
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
{
unsigned int bit;
bool states_printed = false;
unsigned long fs_state = READ_ONCE(info->fs_state);
char *curr = buf;
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
curr += sizeof(STATE_STRING_PREFACE) - 1;
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
*curr++ = fs_state_chars[bit];
states_printed = true;
}
}
/* If no states were printed, reset the buffer */
if (!states_printed)
curr = buf;
*curr++ = 0;
}
#endif
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{
struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK
char statestr[STATE_STRING_BUF_LEN];
const char *errstr;
#endif
@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
#ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno);
btrfs_state_to_string(fs_info, statestr);
if (fmt) {
struct va_format vaf;
va_list args;
@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt;
vaf.va = &args;
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
sb->s_id, function, line, errno, errstr, &vaf);
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args);
} else {
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
sb->s_id, function, line, errno, errstr);
pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
sb->s_id, statestr, function, line, errno, errstr);
}
#endif
@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args;
if (__ratelimit(ratelimit)) {
if (fs_info)
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
fs_info->sb->s_id, &vaf);
else
if (fs_info) {
char statestr[STATE_STRING_BUF_LEN];
btrfs_state_to_string(fs_info, statestr);
printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
fs_info->sb->s_id, statestr, &vaf);
} else {
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
}
}
va_end(args);
@ -861,6 +913,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_space_cache:
case Opt_space_cache_version:
/*
* We already set FREE_SPACE_TREE above because we have
* compat_ro(FREE_SPACE_TREE) set, and we aren't going
* to allow v1 to be set for extent tree v2, simply
* ignore this setting if we're extent tree v2.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
if (token == Opt_space_cache ||
strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(info->mount_opt,
@ -881,6 +941,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
/*
* We cannot operate without the free space tree with
* extent tree v2, ignore this option.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
if (btrfs_test_opt(info, SPACE_CACHE)) {
btrfs_clear_and_info(info, SPACE_CACHE,
"disabling disk space caching");
@ -896,6 +962,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
"the 'inode_cache' option is deprecated and has no effect since 5.11");
break;
case Opt_clear_cache:
/*
* We cannot clear the free space tree with extent tree
* v2, ignore this option.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache");
break;
@ -2383,6 +2455,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
{
struct btrfs_ioctl_vol_args *vol;
struct btrfs_device *device = NULL;
dev_t devt = 0;
int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
@ -2402,7 +2475,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_FORGET_DEV:
ret = btrfs_forget_devices(vol->name);
if (vol->name[0] != 0) {
ret = lookup_bdev(vol->name, &devt);
if (ret)
break;
}
ret = btrfs_forget_devices(devt);
break;
case BTRFS_IOC_DEVICES_READY:
mutex_lock(&uuid_mutex);

View File

@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
/* Remove once support for zoned allocation is feature complete */
#ifdef CONFIG_BTRFS_DEBUG
/* Remove once support for zoned allocation is feature complete */
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
/* Remove once support for extent tree v2 is feature complete */
BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
#endif
#ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid1c34),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned),
BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
#endif
#ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_PTR(verity),
@ -1104,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
ARRAY_SIZE(btrfs_feature_attrs));
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
ARRAY_SIZE(btrfs_feature_attrs[0]));
static const u64 supported_feature_masks[FEAT_MAX] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
@ -1272,11 +1280,6 @@ static void init_feature_attrs(void)
struct btrfs_feature_attr *fa;
int set, i;
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
ARRAY_SIZE(btrfs_feature_attrs));
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
ARRAY_SIZE(btrfs_feature_attrs[0]));
memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
memset(btrfs_unknown_feature_names, 0,
sizeof(btrfs_unknown_feature_names));

View File

@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
struct extent_map *em;
struct rb_node *node;
write_lock(&em_tree->lock);
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
node = rb_first_cached(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
#endif
free_extent_map(em);
}
write_unlock(&em_tree->lock);
}
/*

View File

@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->cache_generation = 0;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
super->uuid_tree_generation = root_item->generation;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
root_item = &fs_info->block_group_root->root_item;
super->block_group_root = root_item->bytenr;
super->block_group_root_generation = root_item->generation;
super->block_group_root_level = root_item->level;
}
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits);
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_set_root_node(&fs_info->block_group_root->root_item,
fs_info->block_group_root->node);
list_add_tail(&fs_info->block_group_root->dirty_list,
&cur_trans->switch_commits);
}
switch_commit_roots(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
@ -2490,10 +2505,10 @@ cleanup_transaction:
* because btrfs_commit_super will poke cleaner thread and it will process it a
* few seconds later.
*/
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&fs_info->trans_lock);
if (list_empty(&fs_info->dead_roots)) {

View File

@ -217,7 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
void btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root);
void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);

View File

@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
static int check_block_group_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_block_group_item bgi;
u32 item_size = btrfs_item_size(leaf, slot);
u64 chunk_objectid;
u64 flags;
u64 type;
@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
/*
* We don't init the nr_global_roots until we load the global
* roots, so this could be 0 at mount time. If it's 0 we'll
* just assume we're fine, and later we'll check against our
* actual value.
*/
if (unlikely(fs_info->nr_global_roots &&
chunk_objectid >= fs_info->nr_global_roots)) {
block_group_err(leaf, slot,
"invalid block group global root id, have %llu, needs to be <= %llu",
chunk_objectid,
fs_info->nr_global_roots);
return -EUCLEAN;
}
} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
block_group_err(leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu",
btrfs_stack_block_group_chunk_objectid(&bgi),
@ -1648,7 +1665,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
/* These trees must never be empty */
if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
owner == BTRFS_CHUNK_TREE_OBJECTID ||
owner == BTRFS_EXTENT_TREE_OBJECTID ||
owner == BTRFS_DEV_TREE_OBJECTID ||
owner == BTRFS_FS_TREE_OBJECTID ||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
@ -1657,12 +1673,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
owner);
return -EUCLEAN;
}
/* Unknown tree */
if (unlikely(owner == 0)) {
generic_err(leaf, 0,
"invalid owner, root 0 is not defined");
return -EUCLEAN;
}
/* EXTENT_TREE_V2 can have empty extent trees. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
}
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,8 @@ struct btrfs_log_ctx {
int log_transid;
bool log_new_dentries;
bool logging_new_name;
/* Indicate if the inode being logged was logged before. */
bool logged_before;
/* Tracks the last logged dir item/index key offset. */
u64 last_dir_item_offset;
struct inode *inode;
@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
ctx->log_transid = 0;
ctx->log_new_dentries = false;
ctx->logging_new_name = false;
ctx->logged_before = false;
ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->ordered_extents);
@ -86,7 +89,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent);
struct dentry *old_dentry, struct btrfs_inode *old_dir,
u64 old_dir_index, struct dentry *parent);
#endif

View File

@ -534,30 +534,20 @@ error:
return ret;
}
static bool device_path_matched(const char *path, struct btrfs_device *device)
{
int found;
rcu_read_lock();
found = strcmp(rcu_str_deref(device->name), path);
rcu_read_unlock();
return found == 0;
}
/*
* Search and remove all stale (devices which are not mounted) devices.
/**
* Search and remove all stale devices (which are not mounted).
* When both inputs are NULL, it will search and release all stale devices.
* path: Optional. When provided will it release all unmounted devices
* matching this path only.
* skip_dev: Optional. Will skip this device when searching for the stale
*
* @devt: Optional. When provided will it release all unmounted devices
* matching this devt only.
* @skip_device: Optional. Will skip this device when searching for the stale
* devices.
* Return: 0 for success or if @path is NULL.
* -EBUSY if @path is a mounted device.
* -ENOENT if @path does not match any device in the list.
*
* Return: 0 for success or if @devt is 0.
* -EBUSY if @devt is a mounted device.
* -ENOENT if @devt does not match any device in the list.
*/
static int btrfs_free_stale_devices(const char *path,
struct btrfs_device *skip_device)
static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
{
struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
struct btrfs_device *device, *tmp_device;
@ -565,7 +555,7 @@ static int btrfs_free_stale_devices(const char *path,
lockdep_assert_held(&uuid_mutex);
if (path)
if (devt)
ret = -ENOENT;
list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
@ -575,13 +565,11 @@ static int btrfs_free_stale_devices(const char *path,
&fs_devices->devices, dev_list) {
if (skip_device && skip_device == device)
continue;
if (path && !device->name)
continue;
if (path && !device_path_matched(path, device))
if (devt && devt != device->devt)
continue;
if (fs_devices->opened) {
/* for an already deleted device return 0 */
if (path && ret != 0)
if (devt && ret != 0)
ret = -EBUSY;
break;
}
@ -614,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device, fmode_t flags,
void *holder)
{
struct request_queue *q;
struct block_device *bdev;
struct btrfs_super_block *disk_super;
u64 devid;
@ -656,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
}
q = bdev_get_queue(bdev);
if (!blk_queue_nonrot(q))
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
fs_devices->rotating = true;
device->bdev = bdev;
@ -781,11 +767,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
struct rcu_string *name;
u64 found_transid = btrfs_super_generation(disk_super);
u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
dev_t path_devt;
int error;
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
error = lookup_bdev(path, &path_devt);
if (error)
return ERR_PTR(error);
if (fsid_change_in_progress) {
if (!has_metadata_uuid)
fs_devices = find_fsid_inprogress(disk_super);
@ -868,6 +860,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
device->devt = path_devt;
list_add_rcu(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++;
@ -928,25 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
/*
* We are going to replace the device path for a given devid,
* make sure it's the same device if the device is mounted
*
* NOTE: the device->fs_info may not be reliable here so pass
* in a NULL to message helpers instead. This avoids a possible
* use-after-free when the fs_info and fs_info->sb are already
* torn down.
*/
if (device->bdev) {
int error;
dev_t path_dev;
error = lookup_bdev(path, &path_dev);
if (error) {
if (device->devt != path_devt) {
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(error);
}
if (device->bdev->bd_dev != path_dev) {
mutex_unlock(&fs_devices->device_list_mutex);
/*
* device->fs_info may not be reliable here, so
* pass in a NULL instead. This avoids a
* possible use-after-free when the fs_info and
* fs_info->sb are already torn down.
*/
btrfs_warn_in_rcu(NULL,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
path, devid, found_transid,
@ -954,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
btrfs_info_in_rcu(device->fs_info,
btrfs_info_in_rcu(NULL,
"devid %llu device path %s changed to %s scanned by %s (%d)",
devid, rcu_str_deref(device->name),
path, current->comm,
@ -972,6 +955,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
fs_devices->missing_devices--;
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
}
device->devt = path_devt;
}
/*
@ -1331,12 +1315,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
return disk_super;
}
int btrfs_forget_devices(const char *path)
int btrfs_forget_devices(dev_t devt)
{
int ret;
mutex_lock(&uuid_mutex);
ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
ret = btrfs_free_stale_devices(devt, NULL);
mutex_unlock(&uuid_mutex);
return ret;
@ -1385,10 +1369,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
}
device = device_list_add(path, disk_super, &new_device_added);
if (!IS_ERR(device)) {
if (new_device_added)
btrfs_free_stale_devices(path, device);
}
if (!IS_ERR(device) && new_device_added)
btrfs_free_stale_devices(device->devt, device);
btrfs_release_disk_super(disk_super);
@ -2102,6 +2084,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
u64 num_devices;
int ret = 0;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
return -EINVAL;
}
/*
* The device list in fs_devices is accessed without locks (neither
* uuid_mutex nor device_list_mutex) as it won't change on a mounted
@ -2606,7 +2593,6 @@ error:
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
{
struct btrfs_root *root = fs_info->dev_root;
struct request_queue *q;
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct block_device *bdev;
@ -2668,6 +2654,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
device->fs_info = fs_info;
device->bdev = bdev;
ret = lookup_bdev(device_path, &device->devt);
if (ret)
goto error_free_device;
ret = btrfs_get_dev_zone_info(device, false);
if (ret)
@ -2679,7 +2668,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
goto error_free_zone;
}
q = bdev_get_queue(bdev);
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = trans->transid;
device->io_width = fs_info->sectorsize;
@ -2727,7 +2715,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q))
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
fs_devices->rotating = true;
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@ -2814,7 +2802,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
* We can ignore the return value as it typically returns -EINVAL and
* only succeeds if the device was an alien.
*/
btrfs_forget_devices(device_path);
btrfs_forget_devices(device->devt);
/* Update ctime/mtime for blkid or udev */
update_dev_time(device_path);
@ -3251,6 +3239,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
u64 length;
int ret;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"relocate: not supported on extent tree v2 yet");
return -EINVAL;
}
/*
* Prevent races with automatic removal of unused block groups.
* After we relocate and before we remove the chunk with offset
@ -7060,6 +7054,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
}
#endif
static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
u64 devid, u8 *uuid)
{
struct btrfs_device *dev;
if (!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_report_missing_device(fs_info, devid, uuid, true);
return ERR_PTR(-ENOENT);
}
dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
if (IS_ERR(dev)) {
btrfs_err(fs_info, "failed to init missing device %llu: %ld",
devid, PTR_ERR(dev));
return dev;
}
btrfs_report_missing_device(fs_info, devid, uuid, false);
return dev;
}
static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
{
@ -7147,28 +7162,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
BTRFS_UUID_SIZE);
args.uuid = uuid;
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
if (!map->stripes[i].dev &&
!btrfs_test_opt(fs_info, DEGRADED)) {
free_extent_map(em);
btrfs_report_missing_device(fs_info, devid, uuid, true);
return -ENOENT;
}
if (!map->stripes[i].dev) {
map->stripes[i].dev =
add_missing_dev(fs_info->fs_devices, devid,
uuid);
map->stripes[i].dev = handle_missing_device(fs_info,
devid, uuid);
if (IS_ERR(map->stripes[i].dev)) {
free_extent_map(em);
btrfs_err(fs_info,
"failed to init missing dev %llu: %ld",
devid, PTR_ERR(map->stripes[i].dev));
return PTR_ERR(map->stripes[i].dev);
}
btrfs_report_missing_device(fs_info, devid, uuid, false);
}
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&(map->stripes[i].dev->dev_state));
}
write_lock(&map_tree->lock);
@ -8299,10 +8303,12 @@ static int relocating_repair_kthread(void *data)
target = cache->start;
btrfs_put_block_group(cache);
sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY",
target);
sb_end_write(fs_info->sb);
return -EBUSY;
}
@ -8330,6 +8336,7 @@ out:
btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
return ret;
}

View File

@ -72,6 +72,11 @@ struct btrfs_device {
/* the mode sent to blkdev_get */
fmode_t mode;
/*
* Device's major-minor number. Must be set even if the device is not
* opened (bdev == NULL), unless the device is missing.
*/
dev_t devt;
unsigned long dev_state;
blk_status_t last_flush_error;
@ -505,7 +510,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
struct btrfs_device *btrfs_scan_one_device(const char *path,
fmode_t flags, void *holder);
int btrfs_forget_devices(const char *path);
int btrfs_forget_devices(dev_t devt);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
void btrfs_assign_next_active_device(struct btrfs_device *device,

View File

@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned) ||
(model == BLK_ZONED_NONE && incompat_zoned)) {
struct btrfs_zoned_device_info *zone_info =
device->zone_info;
struct btrfs_zoned_device_info *zone_info;
zone_info = device->zone_info;
zoned_devices++;
@ -1215,12 +1214,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
struct btrfs_device *device;
u64 logical = cache->start;
u64 length = cache->length;
u64 physical = 0;
int ret;
int i;
unsigned int nofs_flag;
u64 *alloc_offsets = NULL;
u64 *caps = NULL;
u64 *physical = NULL;
unsigned long *active = NULL;
u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
@ -1264,6 +1263,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
if (!physical) {
ret = -ENOMEM;
goto out;
}
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
if (!active) {
ret = -ENOMEM;
@ -1277,14 +1282,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
int dev_replace_is_ongoing = 0;
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
physical[i] = map->stripes[i].physical;
if (device->bdev == NULL) {
alloc_offsets[i] = WP_MISSING_DEV;
continue;
}
is_sequential = btrfs_dev_is_sequential(device, physical);
is_sequential = btrfs_dev_is_sequential(device, physical[i]);
if (is_sequential)
num_sequential++;
else
@ -1299,21 +1304,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
* This zone will be used for allocation, so mark this zone
* non-empty.
*/
btrfs_dev_clear_zone_empty(device, physical);
btrfs_dev_clear_zone_empty(device, physical[i]);
down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
up_read(&dev_replace->rwsem);
/*
* The group is mapped to a sequential zone. Get the zone write
* pointer to determine the allocation offset within the zone.
*/
WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
nofs_flag = memalloc_nofs_save();
ret = btrfs_get_dev_zone(device, physical, &zone);
ret = btrfs_get_dev_zone(device, physical[i], &zone);
memalloc_nofs_restore(nofs_flag);
if (ret == -EIO || ret == -EOPNOTSUPP) {
ret = 0;
@ -1339,7 +1344,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
case BLK_ZONE_COND_READONLY:
btrfs_err(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
physical >> device->zone_info->zone_size_shift,
physical[i] >> device->zone_info->zone_size_shift,
rcu_str_deref(device->name), device->devid);
alloc_offsets[i] = WP_MISSING_DEV;
break;
@ -1404,7 +1409,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (alloc_offsets[0] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
physical);
physical[0]);
ret = -EIO;
goto out;
}
@ -1413,6 +1418,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
cache->zone_is_active = test_bit(0, active);
break;
case BTRFS_BLOCK_GROUP_DUP:
if (map->type & BTRFS_BLOCK_GROUP_DATA) {
btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
ret = -EINVAL;
goto out;
}
if (alloc_offsets[0] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
physical[0]);
ret = -EIO;
goto out;
}
if (alloc_offsets[1] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
physical[1]);
ret = -EIO;
goto out;
}
if (alloc_offsets[0] != alloc_offsets[1]) {
btrfs_err(fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
ret = -EIO;
goto out;
}
if (test_bit(0, active) != test_bit(1, active)) {
if (!btrfs_zone_activate(cache)) {
ret = -EIO;
goto out;
}
} else {
cache->zone_is_active = test_bit(0, active);
}
cache->alloc_offset = alloc_offsets[0];
cache->zone_capacity = min(caps[0], caps[1]);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID0:
case BTRFS_BLOCK_GROUP_RAID10:
@ -1465,6 +1506,7 @@ out:
cache->physical_map = NULL;
}
bitmap_free(active);
kfree(physical);
kfree(caps);
kfree(alloc_offsets);
free_extent_map(em);
@ -1781,50 +1823,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
struct btrfs_device *device;
u64 physical;
bool ret;
int i;
if (!btrfs_is_zoned(block_group->fs_info))
return true;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return true;
spin_lock(&block_group->lock);
if (block_group->zone_is_active) {
ret = true;
goto out_unlock;
}
/* No space left */
if (block_group->alloc_offset == block_group->zone_capacity) {
ret = false;
goto out_unlock;
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
if (device->zone_info->max_active_zones == 0)
continue;
/* No space left */
if (block_group->alloc_offset == block_group->zone_capacity) {
ret = false;
goto out_unlock;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
if (i == map->num_stripes - 1)
block_group->zone_is_active = 1;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
block_group->zone_is_active = 1;
spin_unlock(&block_group->lock);
/* For the active block group list */
btrfs_get_block_group(block_group);
if (block_group->zone_is_active) {
/* For the active block group list */
btrfs_get_block_group(block_group);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(list_empty(&block_group->active_bg_list));
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&block_group->active_bg_list,
&fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
return true;
@ -1840,19 +1887,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
struct btrfs_device *device;
u64 physical;
int ret = 0;
int i;
if (!btrfs_is_zoned(fs_info))
return 0;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return 0;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
@ -1904,25 +1944,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
device->zone_info->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
if (device->zone_info->max_active_zones == 0)
continue;
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
device->zone_info->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
if (ret)
return ret;
btrfs_dev_clear_active_zone(device, physical);
}
btrfs_dec_block_group_ro(block_group);
if (!ret) {
btrfs_dev_clear_active_zone(device, physical);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
/* For active_bg_list */
btrfs_put_block_group(block_group);
/* For active_bg_list */
btrfs_put_block_group(block_group);
}
return ret;
return 0;
}
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)

View File

@ -157,11 +157,6 @@ extern char *simple_dname(struct dentry *, char *, int);
extern void dput_to_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *);
/*
* read_write.c
*/
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
* pipe.c
*/

View File

@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
if (!src_file.file)
return -EBADF;
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
goto fdput;
cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
olen, 0);
if (cloned < 0)
@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
ret = -EINVAL;
else
ret = 0;
fdput:
fdput(src_file);
return ret;
}

View File

@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
return security_file_permission(file,
read_write == READ ? MAY_READ : MAY_WRITE);
}
EXPORT_SYMBOL(rw_verify_area);
static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
@ -1617,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
return 0;
}
/*
* Performs necessary checks before doing a write
*
* Can adjust writing position or amount of bytes to write.
* Returns appropriate error code that caller should return or
* zero in case that write should be allowed.
*/
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
/* Like generic_write_checks(), but takes size of write instead of iter. */
int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
loff_t count;
int ret;
if (IS_SWAPFILE(inode))
return -ETXTBSY;
if (!iov_iter_count(from))
if (!*count)
return 0;
/* FIXME: this is for backwards compatibility with 2.4 */
@ -1644,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EINVAL;
count = iov_iter_count(from);
ret = generic_write_check_limits(file, iocb->ki_pos, &count);
return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
}
EXPORT_SYMBOL(generic_write_checks_count);
/*
* Performs necessary checks before doing a write
*
* Can adjust writing position or amount of bytes to write.
* Returns appropriate error code that caller should return or
* zero in case that write should be allowed.
*/
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
loff_t count = iov_iter_count(from);
int ret;
ret = generic_write_checks_count(iocb, &count);
if (ret)
return ret;

View File

@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
/*
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
* the same mount. Practically, they only need to be on the same file
* system.
*/
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
goto out_drop_write;
ret = -EXDEV;
if (src_file->f_path.mnt != dst_file->f_path.mnt)
if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
goto out_drop_write;
ret = -EISDIR;

View File

@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
extern int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count);
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
@ -3173,6 +3174,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
int rw_verify_area(int, struct file *, const loff_t *, size_t);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
extern int stream_open(struct inode * inode, struct file * filp);

View File

@ -53,6 +53,7 @@ struct btrfs_space_info;
{ BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
#define show_root_type(obj) \

View File

@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
struct btrfs_ioctl_feature_flags {
__u64 compat_flags;
@ -868,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
__u8 align[7];
};
/*
* Data and metadata for an encoded read or write.
*
* Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
* compression). This can be used to read the compressed contents of a file or
* write pre-compressed data directly to a file.
*
* BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
* preadv/pwritev with additional metadata about how the data is encoded and the
* size of the unencoded data.
*
* BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
* the metadata fields, and returns the size of the encoded data. It reads one
* extent per call. It can also read data which is not encoded.
*
* BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
* from the iovecs, and returns the size of the encoded data. Note that the
* encoded data is not validated when it is written; if it is not valid (e.g.,
* it cannot be decompressed), then a subsequent read may return an error.
*
* Since the filesystem page cache contains decoded data, encoded I/O bypasses
* the page cache. Encoded I/O requires CAP_SYS_ADMIN.
*/
struct btrfs_ioctl_encoded_io_args {
/* Input parameters for both reads and writes. */
/*
* iovecs containing encoded data.
*
* For reads, if the size of the encoded data is larger than the sum of
* iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
* ENOBUFS.
*
* For writes, the size of the encoded data is the sum of iov[n].iov_len
* for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
* increase in the future). This must also be less than or equal to
* unencoded_len.
*/
const struct iovec __user *iov;
/* Number of iovecs. */
unsigned long iovcnt;
/*
* Offset in file.
*
* For writes, must be aligned to the sector size of the filesystem.
*/
__s64 offset;
/* Currently must be zero. */
__u64 flags;
/*
* For reads, the following members are output parameters that will
* contain the returned metadata for the encoded data.
* For writes, the following members must be set to the metadata for the
* encoded data.
*/
/*
* Length of the data in the file.
*
* Must be less than or equal to unencoded_len - unencoded_offset. For
* writes, must be aligned to the sector size of the filesystem unless
* the data ends at or beyond the current end of the file.
*/
__u64 len;
/*
* Length of the unencoded (i.e., decrypted and decompressed) data.
*
* For writes, must be no more than 128 KiB (this limit may increase in
* the future). If the unencoded data is actually longer than
* unencoded_len, then it is truncated; if it is shorter, then it is
* extended with zeroes.
*/
__u64 unencoded_len;
/*
* Offset from the first byte of the unencoded data to the first byte of
* logical data in the file.
*
* Must be less than unencoded_len.
*/
__u64 unencoded_offset;
/*
* BTRFS_ENCODED_IO_COMPRESSION_* type.
*
* For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
*/
__u32 compression;
/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
__u32 encryption;
/*
* Reserved for future expansion.
*
* For reads, always returned as zero. Users should check for non-zero
* bytes. If there are any, then the kernel has a newer version of this
* structure with additional information that the user definition is
* missing.
*
* For writes, must be zeroed.
*/
__u8 reserved[64];
};
/* Data is not compressed. */
#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
/* Data is compressed as a single zlib stream. */
#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
/*
* Data is compressed as a single zstd frame with the windowLog compression
* parameter set to no more than 17.
*/
#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
/*
* Data is compressed sector by sector (using the sector size indicated by the
* name of the constant) with LZO1X and wrapped in the format documented in
* fs/btrfs/lzo.c. For writes, the compression sector size must match the
* filesystem sector size.
*/
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
/* Data is not encrypted. */
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
/* Error codes as returned by the kernel */
enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@ -996,5 +1125,9 @@ enum btrfs_err_code {
struct btrfs_ioctl_ino_lookup_user_args)
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#endif /* _UAPI_LINUX_BTRFS_H */

View File

@ -53,6 +53,9 @@
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
/* Holds the block group items for extent tree v2. */
#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
/* device stats in the device tree */
#define BTRFS_DEV_STATS_OBJECTID 0ULL