mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-15 00:34:10 +08:00
Merge branch 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This has fixes and cleanups Dave Sterba collected for the merge window. The biggest functional fixes are between btrfs raid5/6 and scrub, and raid5/6 and device replacement. Some of our pending qgroup fixes are included as well while I bash on the rest in testing. We also have the usual set of cleanups, including one that makes __btrfs_map_block() much more maintainable, and conversions from atomic_t to refcount_t" * 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (71 commits) btrfs: fix the gfp_mask for the reada_zones radix tree Btrfs: fix reported number of inode blocks Btrfs: send, fix file hole not being preserved due to inline extent Btrfs: fix extent map leak during fallocate error path Btrfs: fix incorrect space accounting after failure to insert inline extent Btrfs: fix invalid attempt to free reserved space on failure to cow range btrfs: Handle delalloc error correctly to avoid ordered extent hang btrfs: Fix metadata underflow caused by btrfs_reloc_clone_csum error btrfs: check if the device is flush capable btrfs: delete unused member nobarriers btrfs: scrub: Fix RAID56 recovery race condition btrfs: scrub: Introduce full stripe lock for RAID56 btrfs: Use ktime_get_real_ts for root ctime Btrfs: handle only applicable errors returned by btrfs_get_extent btrfs: qgroup: Fix qgroup corruption caused by inode_cache mount option btrfs: use q which is already obtained from bdev_get_queue Btrfs: switch to div64_u64 if with a u64 divisor Btrfs: update scrub_parity to use u64 stripe_len Btrfs: enable repair during read for raid56 profile btrfs: use clear_page where appropriate ...
This commit is contained in:
commit
1176032cb1
@ -2926,6 +2926,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
|
||||
S: Maintained
|
||||
F: Documentation/filesystems/btrfs.txt
|
||||
F: fs/btrfs/
|
||||
F: include/linux/btrfs*
|
||||
F: include/uapi/linux/btrfs*
|
||||
|
||||
BTTV VIDEO4LINUX DRIVER
|
||||
M: Mauro Carvalho Chehab <mchehab@s-opensource.com>
|
||||
|
@ -26,6 +26,11 @@
|
||||
#include "delayed-ref.h"
|
||||
#include "locking.h"
|
||||
|
||||
enum merge_mode {
|
||||
MERGE_IDENTICAL_KEYS = 1,
|
||||
MERGE_IDENTICAL_PARENTS,
|
||||
};
|
||||
|
||||
/* Just an arbitrary number so we can be sure this happened */
|
||||
#define BACKREF_FOUND_SHARED 6
|
||||
|
||||
@ -533,7 +538,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
* slot==nritems. In that case, go to the next leaf before we continue.
|
||||
*/
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
if (time_seq == (u64)-1)
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
else
|
||||
ret = btrfs_next_old_leaf(root, path, time_seq);
|
||||
@ -577,7 +582,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
eie = NULL;
|
||||
}
|
||||
next:
|
||||
if (time_seq == (u64)-1)
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_next_item(root, path);
|
||||
else
|
||||
ret = btrfs_next_old_item(root, path, time_seq);
|
||||
@ -629,7 +634,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
|
||||
if (path->search_commit_root)
|
||||
root_level = btrfs_header_level(root->commit_root);
|
||||
else if (time_seq == (u64)-1)
|
||||
else if (time_seq == SEQ_LAST)
|
||||
root_level = btrfs_header_level(root->node);
|
||||
else
|
||||
root_level = btrfs_old_root_level(root, time_seq);
|
||||
@ -640,7 +645,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
path->lowest_level = level;
|
||||
if (time_seq == (u64)-1)
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
|
||||
0, 0);
|
||||
else
|
||||
@ -809,14 +814,12 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||
/*
|
||||
* merge backrefs and adjust counts accordingly
|
||||
*
|
||||
* mode = 1: merge identical keys, if key is set
|
||||
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
|
||||
* additionally, we could even add a key range for the blocks we
|
||||
* looked into to merge even more (-> replace unresolved refs by those
|
||||
* having a parent).
|
||||
* mode = 2: merge identical parents
|
||||
* FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref
|
||||
* then we can merge more here. Additionally, we could even add a key
|
||||
* range for the blocks we looked into to merge even more (-> replace
|
||||
* unresolved refs by those having a parent).
|
||||
*/
|
||||
static void __merge_refs(struct list_head *head, int mode)
|
||||
static void __merge_refs(struct list_head *head, enum merge_mode mode)
|
||||
{
|
||||
struct __prelim_ref *pos1;
|
||||
|
||||
@ -829,7 +832,7 @@ static void __merge_refs(struct list_head *head, int mode)
|
||||
|
||||
if (!ref_for_same_block(ref1, ref2))
|
||||
continue;
|
||||
if (mode == 1) {
|
||||
if (mode == MERGE_IDENTICAL_KEYS) {
|
||||
if (!ref1->parent && ref2->parent)
|
||||
swap(ref1, ref2);
|
||||
} else {
|
||||
@ -1196,7 +1199,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
|
||||
*
|
||||
* NOTE: This can return values > 0
|
||||
*
|
||||
* If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
|
||||
* If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
|
||||
* much like trans == NULL case, the difference only lies in it will not
|
||||
* commit root.
|
||||
* The special case is for qgroup to search roots in commit_transaction().
|
||||
@ -1243,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
|
||||
path->skip_locking = 1;
|
||||
}
|
||||
|
||||
if (time_seq == (u64)-1)
|
||||
if (time_seq == SEQ_LAST)
|
||||
path->skip_locking = 1;
|
||||
|
||||
/*
|
||||
@ -1273,9 +1276,9 @@ again:
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (trans && likely(trans->type != __TRANS_DUMMY) &&
|
||||
time_seq != (u64)-1) {
|
||||
time_seq != SEQ_LAST) {
|
||||
#else
|
||||
if (trans && time_seq != (u64)-1) {
|
||||
if (trans && time_seq != SEQ_LAST) {
|
||||
#endif
|
||||
/*
|
||||
* look if there are updates for this ref queued and lock the
|
||||
@ -1286,7 +1289,7 @@ again:
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
if (head) {
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
atomic_inc(&head->node.refs);
|
||||
refcount_inc(&head->node.refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
btrfs_release_path(path);
|
||||
@ -1374,7 +1377,7 @@ again:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
__merge_refs(&prefs, 1);
|
||||
__merge_refs(&prefs, MERGE_IDENTICAL_KEYS);
|
||||
|
||||
ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
|
||||
extent_item_pos, total_refs,
|
||||
@ -1382,7 +1385,7 @@ again:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
__merge_refs(&prefs, 2);
|
||||
__merge_refs(&prefs, MERGE_IDENTICAL_PARENTS);
|
||||
|
||||
while (!list_empty(&prefs)) {
|
||||
ref = list_first_entry(&prefs, struct __prelim_ref, list);
|
||||
|
@ -124,6 +124,13 @@ struct btrfs_inode {
|
||||
*/
|
||||
u64 delalloc_bytes;
|
||||
|
||||
/*
|
||||
* Total number of bytes pending delalloc that fall within a file
|
||||
* range that is either a hole or beyond EOF (and no prealloc extent
|
||||
* exists in the range). This is always <= delalloc_bytes.
|
||||
*/
|
||||
u64 new_delalloc_bytes;
|
||||
|
||||
/*
|
||||
* total number of bytes pending defrag, used by stat to check whether
|
||||
* it needs COW.
|
||||
|
@ -44,7 +44,7 @@
|
||||
|
||||
struct compressed_bio {
|
||||
/* number of bios pending for this compressed extent */
|
||||
atomic_t pending_bios;
|
||||
refcount_t pending_bios;
|
||||
|
||||
/* the pages with the compressed data on them */
|
||||
struct page **compressed_pages;
|
||||
@ -161,7 +161,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
*/
|
||||
if (!atomic_dec_and_test(&cb->pending_bios))
|
||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
||||
goto out;
|
||||
|
||||
inode = cb->inode;
|
||||
@ -274,7 +274,7 @@ static void end_compressed_bio_write(struct bio *bio)
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
*/
|
||||
if (!atomic_dec_and_test(&cb->pending_bios))
|
||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
||||
goto out;
|
||||
|
||||
/* ok, we're the last bio for this extent, step one is to
|
||||
@ -342,7 +342,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||
if (!cb)
|
||||
return -ENOMEM;
|
||||
atomic_set(&cb->pending_bios, 0);
|
||||
refcount_set(&cb->pending_bios, 0);
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->start = start;
|
||||
@ -363,7 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
atomic_inc(&cb->pending_bios);
|
||||
refcount_set(&cb->pending_bios, 1);
|
||||
|
||||
/* create and submit bios for the compressed pages */
|
||||
bytes_left = compressed_len;
|
||||
@ -388,7 +388,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
atomic_inc(&cb->pending_bios);
|
||||
refcount_inc(&cb->pending_bios);
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
@ -607,7 +607,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
if (!cb)
|
||||
goto out;
|
||||
|
||||
atomic_set(&cb->pending_bios, 0);
|
||||
refcount_set(&cb->pending_bios, 0);
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
@ -656,7 +656,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
atomic_inc(&cb->pending_bios);
|
||||
refcount_set(&cb->pending_bios, 1);
|
||||
|
||||
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
|
||||
page = cb->compressed_pages[pg_index];
|
||||
@ -685,7 +685,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
atomic_inc(&cb->pending_bios);
|
||||
refcount_inc(&cb->pending_bios);
|
||||
|
||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio,
|
||||
|
@ -567,7 +567,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
|
||||
static noinline int
|
||||
tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb, int dst_slot, int src_slot,
|
||||
int nr_items, gfp_t flags)
|
||||
int nr_items)
|
||||
{
|
||||
struct tree_mod_elem *tm = NULL;
|
||||
struct tree_mod_elem **tm_list = NULL;
|
||||
@ -578,11 +578,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
|
||||
if (!tree_mod_need_log(fs_info, eb))
|
||||
return 0;
|
||||
|
||||
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
|
||||
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
|
||||
if (!tm_list)
|
||||
return -ENOMEM;
|
||||
|
||||
tm = kzalloc(sizeof(*tm), flags);
|
||||
tm = kzalloc(sizeof(*tm), GFP_NOFS);
|
||||
if (!tm) {
|
||||
ret = -ENOMEM;
|
||||
goto free_tms;
|
||||
@ -596,7 +596,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
|
||||
|
||||
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
|
||||
tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
|
||||
MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
|
||||
MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
|
||||
if (!tm_list[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto free_tms;
|
||||
@ -663,7 +663,7 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
|
||||
static noinline int
|
||||
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *old_root,
|
||||
struct extent_buffer *new_root, gfp_t flags,
|
||||
struct extent_buffer *new_root,
|
||||
int log_removal)
|
||||
{
|
||||
struct tree_mod_elem *tm = NULL;
|
||||
@ -678,14 +678,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
|
||||
if (log_removal && btrfs_header_level(old_root) > 0) {
|
||||
nritems = btrfs_header_nritems(old_root);
|
||||
tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
|
||||
flags);
|
||||
GFP_NOFS);
|
||||
if (!tm_list) {
|
||||
ret = -ENOMEM;
|
||||
goto free_tms;
|
||||
}
|
||||
for (i = 0; i < nritems; i++) {
|
||||
tm_list[i] = alloc_tree_mod_elem(old_root, i,
|
||||
MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
|
||||
MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
|
||||
if (!tm_list[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto free_tms;
|
||||
@ -693,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
}
|
||||
|
||||
tm = kzalloc(sizeof(*tm), flags);
|
||||
tm = kzalloc(sizeof(*tm), GFP_NOFS);
|
||||
if (!tm) {
|
||||
ret = -ENOMEM;
|
||||
goto free_tms;
|
||||
@ -873,7 +873,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
|
||||
{
|
||||
int ret;
|
||||
ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
|
||||
nr_items, GFP_NOFS);
|
||||
nr_items);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
@ -943,7 +943,7 @@ tree_mod_log_set_root_pointer(struct btrfs_root *root,
|
||||
{
|
||||
int ret;
|
||||
ret = tree_mod_log_insert_root(root->fs_info, root->node,
|
||||
new_root_node, GFP_NOFS, log_removal);
|
||||
new_root_node, log_removal);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/dynamic_debug.h>
|
||||
#include <linux/refcount.h>
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
#include "async-thread.h"
|
||||
@ -518,7 +519,7 @@ struct btrfs_caching_control {
|
||||
struct btrfs_work work;
|
||||
struct btrfs_block_group_cache *block_group;
|
||||
u64 progress;
|
||||
atomic_t count;
|
||||
refcount_t count;
|
||||
};
|
||||
|
||||
/* Once caching_thread() finds this much free space, it will wake up waiters. */
|
||||
@ -538,6 +539,14 @@ struct btrfs_io_ctl {
|
||||
unsigned check_crcs:1;
|
||||
};
|
||||
|
||||
/*
|
||||
* Tree to record all locked full stripes of a RAID5/6 block group
|
||||
*/
|
||||
struct btrfs_full_stripe_locks_tree {
|
||||
struct rb_root root;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
struct btrfs_block_group_cache {
|
||||
struct btrfs_key key;
|
||||
struct btrfs_block_group_item item;
|
||||
@ -648,6 +657,9 @@ struct btrfs_block_group_cache {
|
||||
* Protected by free_space_lock.
|
||||
*/
|
||||
int needs_free_space;
|
||||
|
||||
/* Record locked full stripes for RAID5/6 block group */
|
||||
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
|
||||
};
|
||||
|
||||
/* delayed seq elem */
|
||||
@ -658,6 +670,8 @@ struct seq_list {
|
||||
|
||||
#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
|
||||
|
||||
#define SEQ_LAST ((u64)-1)
|
||||
|
||||
enum btrfs_orphan_cleanup_state {
|
||||
ORPHAN_CLEANUP_STARTED = 1,
|
||||
ORPHAN_CLEANUP_DONE = 2,
|
||||
@ -702,6 +716,11 @@ struct btrfs_delayed_root;
|
||||
#define BTRFS_FS_BTREE_ERR 11
|
||||
#define BTRFS_FS_LOG1_ERR 12
|
||||
#define BTRFS_FS_LOG2_ERR 13
|
||||
/*
|
||||
* Indicate that a whole-filesystem exclusive operation is running
|
||||
* (device replace, resize, device add/delete, balance)
|
||||
*/
|
||||
#define BTRFS_FS_EXCL_OP 14
|
||||
|
||||
struct btrfs_fs_info {
|
||||
u8 fsid[BTRFS_FSID_SIZE];
|
||||
@ -1066,8 +1085,6 @@ struct btrfs_fs_info {
|
||||
/* device replace state */
|
||||
struct btrfs_dev_replace dev_replace;
|
||||
|
||||
atomic_t mutually_exclusive_operation_running;
|
||||
|
||||
struct percpu_counter bio_counter;
|
||||
wait_queue_head_t replace_wait;
|
||||
|
||||
@ -1220,7 +1237,7 @@ struct btrfs_root {
|
||||
dev_t anon_dev;
|
||||
|
||||
spinlock_t root_item_lock;
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
|
||||
struct mutex delalloc_mutex;
|
||||
spinlock_t delalloc_lock;
|
||||
@ -3646,6 +3663,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
|
||||
struct btrfs_device *dev);
|
||||
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
struct btrfs_scrub_progress *progress);
|
||||
static inline void btrfs_init_full_stripe_locks_tree(
|
||||
struct btrfs_full_stripe_locks_tree *locks_root)
|
||||
{
|
||||
locks_root->root = RB_ROOT;
|
||||
mutex_init(&locks_root->lock);
|
||||
}
|
||||
|
||||
/* dev-replace.c */
|
||||
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
|
||||
@ -3670,8 +3693,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
||||
struct btrfs_key *start, struct btrfs_key *end);
|
||||
int btrfs_reada_wait(void *handle);
|
||||
void btrfs_reada_detach(void *handle);
|
||||
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb, int err);
|
||||
int btree_readahead_hook(struct extent_buffer *eb, int err);
|
||||
|
||||
static inline int is_fstree(u64 rootid)
|
||||
{
|
||||
|
@ -52,7 +52,7 @@ static inline void btrfs_init_delayed_node(
|
||||
{
|
||||
delayed_node->root = root;
|
||||
delayed_node->inode_id = inode_id;
|
||||
atomic_set(&delayed_node->refs, 0);
|
||||
refcount_set(&delayed_node->refs, 0);
|
||||
delayed_node->ins_root = RB_ROOT;
|
||||
delayed_node->del_root = RB_ROOT;
|
||||
mutex_init(&delayed_node->mutex);
|
||||
@ -81,7 +81,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
|
||||
node = READ_ONCE(btrfs_inode->delayed_node);
|
||||
if (node) {
|
||||
atomic_inc(&node->refs);
|
||||
refcount_inc(&node->refs);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -89,14 +89,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
|
||||
if (node) {
|
||||
if (btrfs_inode->delayed_node) {
|
||||
atomic_inc(&node->refs); /* can be accessed */
|
||||
refcount_inc(&node->refs); /* can be accessed */
|
||||
BUG_ON(btrfs_inode->delayed_node != node);
|
||||
spin_unlock(&root->inode_lock);
|
||||
return node;
|
||||
}
|
||||
btrfs_inode->delayed_node = node;
|
||||
/* can be accessed and cached in the inode */
|
||||
atomic_add(2, &node->refs);
|
||||
refcount_add(2, &node->refs);
|
||||
spin_unlock(&root->inode_lock);
|
||||
return node;
|
||||
}
|
||||
@ -125,7 +125,7 @@ again:
|
||||
btrfs_init_delayed_node(node, root, ino);
|
||||
|
||||
/* cached in the btrfs inode and can be accessed */
|
||||
atomic_add(2, &node->refs);
|
||||
refcount_set(&node->refs, 2);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret) {
|
||||
@ -166,7 +166,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
|
||||
} else {
|
||||
list_add_tail(&node->n_list, &root->node_list);
|
||||
list_add_tail(&node->p_list, &root->prepare_list);
|
||||
atomic_inc(&node->refs); /* inserted into list */
|
||||
refcount_inc(&node->refs); /* inserted into list */
|
||||
root->nodes++;
|
||||
set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
|
||||
}
|
||||
@ -180,7 +180,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
|
||||
spin_lock(&root->lock);
|
||||
if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
|
||||
root->nodes--;
|
||||
atomic_dec(&node->refs); /* not in the list */
|
||||
refcount_dec(&node->refs); /* not in the list */
|
||||
list_del_init(&node->n_list);
|
||||
if (!list_empty(&node->p_list))
|
||||
list_del_init(&node->p_list);
|
||||
@ -201,7 +201,7 @@ static struct btrfs_delayed_node *btrfs_first_delayed_node(
|
||||
|
||||
p = delayed_root->node_list.next;
|
||||
node = list_entry(p, struct btrfs_delayed_node, n_list);
|
||||
atomic_inc(&node->refs);
|
||||
refcount_inc(&node->refs);
|
||||
out:
|
||||
spin_unlock(&delayed_root->lock);
|
||||
|
||||
@ -228,7 +228,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
|
||||
p = node->n_list.next;
|
||||
|
||||
next = list_entry(p, struct btrfs_delayed_node, n_list);
|
||||
atomic_inc(&next->refs);
|
||||
refcount_inc(&next->refs);
|
||||
out:
|
||||
spin_unlock(&delayed_root->lock);
|
||||
|
||||
@ -253,11 +253,11 @@ static void __btrfs_release_delayed_node(
|
||||
btrfs_dequeue_delayed_node(delayed_root, delayed_node);
|
||||
mutex_unlock(&delayed_node->mutex);
|
||||
|
||||
if (atomic_dec_and_test(&delayed_node->refs)) {
|
||||
if (refcount_dec_and_test(&delayed_node->refs)) {
|
||||
bool free = false;
|
||||
struct btrfs_root *root = delayed_node->root;
|
||||
spin_lock(&root->inode_lock);
|
||||
if (atomic_read(&delayed_node->refs) == 0) {
|
||||
if (refcount_read(&delayed_node->refs) == 0) {
|
||||
radix_tree_delete(&root->delayed_nodes_tree,
|
||||
delayed_node->inode_id);
|
||||
free = true;
|
||||
@ -286,7 +286,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
|
||||
p = delayed_root->prepare_list.next;
|
||||
list_del_init(p);
|
||||
node = list_entry(p, struct btrfs_delayed_node, p_list);
|
||||
atomic_inc(&node->refs);
|
||||
refcount_inc(&node->refs);
|
||||
out:
|
||||
spin_unlock(&delayed_root->lock);
|
||||
|
||||
@ -308,7 +308,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
|
||||
item->ins_or_del = 0;
|
||||
item->bytes_reserved = 0;
|
||||
item->delayed_node = NULL;
|
||||
atomic_set(&item->refs, 1);
|
||||
refcount_set(&item->refs, 1);
|
||||
}
|
||||
return item;
|
||||
}
|
||||
@ -483,7 +483,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
|
||||
{
|
||||
if (item) {
|
||||
__btrfs_remove_delayed_item(item);
|
||||
if (atomic_dec_and_test(&item->refs))
|
||||
if (refcount_dec_and_test(&item->refs))
|
||||
kfree(item);
|
||||
}
|
||||
}
|
||||
@ -1600,14 +1600,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
mutex_lock(&delayed_node->mutex);
|
||||
item = __btrfs_first_delayed_insertion_item(delayed_node);
|
||||
while (item) {
|
||||
atomic_inc(&item->refs);
|
||||
refcount_inc(&item->refs);
|
||||
list_add_tail(&item->readdir_list, ins_list);
|
||||
item = __btrfs_next_delayed_item(item);
|
||||
}
|
||||
|
||||
item = __btrfs_first_delayed_deletion_item(delayed_node);
|
||||
while (item) {
|
||||
atomic_inc(&item->refs);
|
||||
refcount_inc(&item->refs);
|
||||
list_add_tail(&item->readdir_list, del_list);
|
||||
item = __btrfs_next_delayed_item(item);
|
||||
}
|
||||
@ -1621,7 +1621,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
* insert/delete delayed items in this period. So we also needn't
|
||||
* requeue or dequeue this delayed node.
|
||||
*/
|
||||
atomic_dec(&delayed_node->refs);
|
||||
refcount_dec(&delayed_node->refs);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1634,13 +1634,13 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
|
||||
|
||||
list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
|
||||
list_del(&curr->readdir_list);
|
||||
if (atomic_dec_and_test(&curr->refs))
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(curr, next, del_list, readdir_list) {
|
||||
list_del(&curr->readdir_list);
|
||||
if (atomic_dec_and_test(&curr->refs))
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
}
|
||||
|
||||
@ -1667,7 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
list_del(&curr->readdir_list);
|
||||
ret = (curr->key.offset == index);
|
||||
|
||||
if (atomic_dec_and_test(&curr->refs))
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
|
||||
if (ret)
|
||||
@ -1705,7 +1705,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
list_del(&curr->readdir_list);
|
||||
|
||||
if (curr->key.offset < ctx->pos) {
|
||||
if (atomic_dec_and_test(&curr->refs))
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
continue;
|
||||
}
|
||||
@ -1722,7 +1722,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
over = !dir_emit(ctx, name, name_len,
|
||||
location.objectid, d_type);
|
||||
|
||||
if (atomic_dec_and_test(&curr->refs))
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
|
||||
if (over)
|
||||
@ -1963,7 +1963,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
inode_id = delayed_nodes[n - 1]->inode_id + 1;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
atomic_inc(&delayed_nodes[i]->refs);
|
||||
refcount_inc(&delayed_nodes[i]->refs);
|
||||
spin_unlock(&root->inode_lock);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include <linux/refcount.h>
|
||||
#include "ctree.h"
|
||||
|
||||
/* types of the delayed item */
|
||||
@ -67,7 +67,7 @@ struct btrfs_delayed_node {
|
||||
struct rb_root del_root;
|
||||
struct mutex mutex;
|
||||
struct btrfs_inode_item inode_item;
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
u64 index_cnt;
|
||||
unsigned long flags;
|
||||
int count;
|
||||
@ -80,7 +80,7 @@ struct btrfs_delayed_item {
|
||||
struct list_head readdir_list; /* used for readdir items */
|
||||
u64 bytes_reserved;
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
int ins_or_del;
|
||||
u32 data_len;
|
||||
char data[0];
|
||||
|
@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
|
||||
if (mutex_trylock(&head->mutex))
|
||||
return 0;
|
||||
|
||||
atomic_inc(&head->node.refs);
|
||||
refcount_inc(&head->node.refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
||||
/* first set the basic ref node struct up */
|
||||
atomic_set(&ref->refs, 1);
|
||||
refcount_set(&ref->refs, 1);
|
||||
ref->bytenr = bytenr;
|
||||
ref->num_bytes = num_bytes;
|
||||
ref->ref_mod = count_mod;
|
||||
@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
||||
/* first set the basic ref node struct up */
|
||||
atomic_set(&ref->refs, 1);
|
||||
refcount_set(&ref->refs, 1);
|
||||
ref->bytenr = bytenr;
|
||||
ref->num_bytes = num_bytes;
|
||||
ref->ref_mod = 1;
|
||||
@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
seq = atomic64_read(&fs_info->tree_mod_seq);
|
||||
|
||||
/* first set the basic ref node struct up */
|
||||
atomic_set(&ref->refs, 1);
|
||||
refcount_set(&ref->refs, 1);
|
||||
ref->bytenr = bytenr;
|
||||
ref->num_bytes = num_bytes;
|
||||
ref->ref_mod = 1;
|
||||
|
@ -18,6 +18,8 @@
|
||||
#ifndef __DELAYED_REF__
|
||||
#define __DELAYED_REF__
|
||||
|
||||
#include <linux/refcount.h>
|
||||
|
||||
/* these are the possible values of struct btrfs_delayed_ref_node->action */
|
||||
#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
|
||||
#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
|
||||
@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node {
|
||||
u64 seq;
|
||||
|
||||
/* ref count on this data structure */
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
|
||||
/*
|
||||
* how many refs is this entry adding or deleting. For
|
||||
@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
|
||||
|
||||
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
|
||||
{
|
||||
WARN_ON(atomic_read(&ref->refs) == 0);
|
||||
if (atomic_dec_and_test(&ref->refs)) {
|
||||
WARN_ON(refcount_read(&ref->refs) == 0);
|
||||
if (refcount_dec_and_test(&ref->refs)) {
|
||||
WARN_ON(ref->in_tree);
|
||||
switch (ref->type) {
|
||||
case BTRFS_TREE_BLOCK_REF_KEY:
|
||||
|
@ -546,8 +546,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
btrfs_rm_dev_replace_blocked(fs_info);
|
||||
if (tgt_device)
|
||||
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
|
||||
btrfs_rm_dev_replace_unblocked(fs_info);
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
|
||||
return scrub_ret;
|
||||
@ -665,7 +667,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
srcdev = dev_replace->srcdev;
|
||||
args->status.progress_1000 = div_u64(dev_replace->cursor_left,
|
||||
args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
|
||||
div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
|
||||
break;
|
||||
}
|
||||
@ -784,8 +786,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
btrfs_dev_replace_unlock(dev_replace, 1);
|
||||
|
||||
WARN_ON(atomic_xchg(
|
||||
&fs_info->mutually_exclusive_operation_running, 1));
|
||||
WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
|
||||
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
|
||||
return PTR_ERR_OR_ZERO(task);
|
||||
}
|
||||
@ -814,7 +815,7 @@ static int btrfs_dev_replace_kthread(void *data)
|
||||
(unsigned int)progress);
|
||||
}
|
||||
btrfs_dev_replace_continue_on_mount(fs_info);
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
err:
|
||||
if (reads_done &&
|
||||
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
||||
btree_readahead_hook(fs_info, eb, ret);
|
||||
btree_readahead_hook(eb, ret);
|
||||
|
||||
if (ret) {
|
||||
/*
|
||||
@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
|
||||
eb->read_mirror = failed_mirror;
|
||||
atomic_dec(&eb->io_pages);
|
||||
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
|
||||
btree_readahead_hook(eb->fs_info, eb, -EIO);
|
||||
btree_readahead_hook(eb, -EIO);
|
||||
return -EIO; /* we fixed nothing */
|
||||
}
|
||||
|
||||
@ -1340,7 +1340,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
atomic_set(&root->log_writers, 0);
|
||||
atomic_set(&root->log_batch, 0);
|
||||
atomic_set(&root->orphan_inodes, 0);
|
||||
atomic_set(&root->refs, 1);
|
||||
refcount_set(&root->refs, 1);
|
||||
atomic_set(&root->will_be_snapshoted, 0);
|
||||
atomic64_set(&root->qgroup_meta_rsv, 0);
|
||||
root->log_transid = 0;
|
||||
@ -3497,10 +3497,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
|
||||
*/
|
||||
static int write_dev_flush(struct btrfs_device *device, int wait)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(device->bdev);
|
||||
struct bio *bio;
|
||||
int ret = 0;
|
||||
|
||||
if (device->nobarriers)
|
||||
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||
return 0;
|
||||
|
||||
if (wait) {
|
||||
@ -4321,7 +4322,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
atomic_inc(&head->node.refs);
|
||||
refcount_inc(&head->node.refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
@ -4593,7 +4594,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
|
||||
t = list_first_entry(&fs_info->trans_list,
|
||||
struct btrfs_transaction, list);
|
||||
if (t->state >= TRANS_STATE_COMMIT_START) {
|
||||
atomic_inc(&t->use_count);
|
||||
refcount_inc(&t->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
btrfs_wait_for_commit(fs_info, t->transid);
|
||||
btrfs_put_transaction(t);
|
||||
|
@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
*/
|
||||
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
|
||||
{
|
||||
if (atomic_inc_not_zero(&root->refs))
|
||||
if (refcount_inc_not_zero(&root->refs))
|
||||
return root;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void btrfs_put_fs_root(struct btrfs_root *root)
|
||||
{
|
||||
if (atomic_dec_and_test(&root->refs))
|
||||
if (refcount_dec_and_test(&root->refs))
|
||||
kfree(root);
|
||||
}
|
||||
|
||||
|
@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
|
||||
if (atomic_dec_and_test(&cache->count)) {
|
||||
WARN_ON(cache->pinned > 0);
|
||||
WARN_ON(cache->reserved > 0);
|
||||
|
||||
/*
|
||||
* If not empty, someone is still holding mutex of
|
||||
* full_stripe_lock, which can only be released by caller.
|
||||
* And it will definitely cause use-after-free when caller
|
||||
* tries to release full stripe lock.
|
||||
*
|
||||
* No better way to resolve, but only to warn.
|
||||
*/
|
||||
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
}
|
||||
@ -316,14 +326,14 @@ get_caching_control(struct btrfs_block_group_cache *cache)
|
||||
}
|
||||
|
||||
ctl = cache->caching_ctl;
|
||||
atomic_inc(&ctl->count);
|
||||
refcount_inc(&ctl->count);
|
||||
spin_unlock(&cache->lock);
|
||||
return ctl;
|
||||
}
|
||||
|
||||
static void put_caching_control(struct btrfs_caching_control *ctl)
|
||||
{
|
||||
if (atomic_dec_and_test(&ctl->count))
|
||||
if (refcount_dec_and_test(&ctl->count))
|
||||
kfree(ctl);
|
||||
}
|
||||
|
||||
@ -599,7 +609,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
|
||||
init_waitqueue_head(&caching_ctl->wait);
|
||||
caching_ctl->block_group = cache;
|
||||
caching_ctl->progress = cache->key.objectid;
|
||||
atomic_set(&caching_ctl->count, 1);
|
||||
refcount_set(&caching_ctl->count, 1);
|
||||
btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
|
||||
caching_thread, NULL, NULL);
|
||||
|
||||
@ -620,7 +630,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
ctl = cache->caching_ctl;
|
||||
atomic_inc(&ctl->count);
|
||||
refcount_inc(&ctl->count);
|
||||
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
@ -707,7 +717,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
|
||||
}
|
||||
|
||||
down_write(&fs_info->commit_root_sem);
|
||||
atomic_inc(&caching_ctl->count);
|
||||
refcount_inc(&caching_ctl->count);
|
||||
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
|
||||
@ -892,7 +902,7 @@ search_again:
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
if (head) {
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
atomic_inc(&head->node.refs);
|
||||
refcount_inc(&head->node.refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
btrfs_release_path(path);
|
||||
@ -2980,7 +2990,7 @@ again:
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
ref = &head->node;
|
||||
atomic_inc(&ref->refs);
|
||||
refcount_inc(&ref->refs);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
/*
|
||||
@ -3003,7 +3013,6 @@ again:
|
||||
goto again;
|
||||
}
|
||||
out:
|
||||
assert_qgroups_uptodate(trans);
|
||||
trans->can_flush_pending_bgs = can_flush_pending_bgs;
|
||||
return 0;
|
||||
}
|
||||
@ -3057,7 +3066,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
||||
}
|
||||
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
atomic_inc(&head->node.refs);
|
||||
refcount_inc(&head->node.refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
btrfs_release_path(path);
|
||||
@ -3443,7 +3452,8 @@ again:
|
||||
/*
|
||||
* don't bother trying to write stuff out _if_
|
||||
* a) we're not cached,
|
||||
* b) we're with nospace_cache mount option.
|
||||
* b) we're with nospace_cache mount option,
|
||||
* c) we're with v2 space_cache (FREE_SPACE_TREE).
|
||||
*/
|
||||
dcs = BTRFS_DC_WRITTEN;
|
||||
spin_unlock(&block_group->lock);
|
||||
@ -9917,6 +9927,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
|
||||
btrfs_init_free_space_ctl(cache);
|
||||
atomic_set(&cache->trimming, 0);
|
||||
mutex_init(&cache->free_space_lock);
|
||||
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
|
||||
|
||||
return cache;
|
||||
}
|
||||
@ -10416,7 +10427,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
&fs_info->caching_block_groups, list)
|
||||
if (ctl->block_group == block_group) {
|
||||
caching_ctl = ctl;
|
||||
atomic_inc(&caching_ctl->count);
|
||||
refcount_inc(&caching_ctl->count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -10850,7 +10861,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
trans = fs_info->running_transaction;
|
||||
if (trans)
|
||||
atomic_inc(&trans->use_count);
|
||||
refcount_inc(&trans->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
ret = find_free_dev_extent_start(trans, device, minlen, start,
|
||||
|
@ -68,7 +68,7 @@ void btrfs_leak_debug_check(void)
|
||||
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
|
||||
state->start, state->end, state->state,
|
||||
extent_state_in_tree(state),
|
||||
atomic_read(&state->refs));
|
||||
refcount_read(&state->refs));
|
||||
list_del(&state->leak_list);
|
||||
kmem_cache_free(extent_state_cache, state);
|
||||
}
|
||||
@ -238,7 +238,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
|
||||
state->failrec = NULL;
|
||||
RB_CLEAR_NODE(&state->rb_node);
|
||||
btrfs_leak_debug_add(&state->leak_list, &states);
|
||||
atomic_set(&state->refs, 1);
|
||||
refcount_set(&state->refs, 1);
|
||||
init_waitqueue_head(&state->wq);
|
||||
trace_alloc_extent_state(state, mask, _RET_IP_);
|
||||
return state;
|
||||
@ -248,7 +248,7 @@ void free_extent_state(struct extent_state *state)
|
||||
{
|
||||
if (!state)
|
||||
return;
|
||||
if (atomic_dec_and_test(&state->refs)) {
|
||||
if (refcount_dec_and_test(&state->refs)) {
|
||||
WARN_ON(extent_state_in_tree(state));
|
||||
btrfs_leak_debug_del(&state->leak_list);
|
||||
trace_free_extent_state(state, _RET_IP_);
|
||||
@ -641,7 +641,7 @@ again:
|
||||
if (cached && extent_state_in_tree(cached) &&
|
||||
cached->start <= start && cached->end > start) {
|
||||
if (clear)
|
||||
atomic_dec(&cached->refs);
|
||||
refcount_dec(&cached->refs);
|
||||
state = cached;
|
||||
goto hit_next;
|
||||
}
|
||||
@ -793,7 +793,7 @@ process_node:
|
||||
|
||||
if (state->state & bits) {
|
||||
start = state->start;
|
||||
atomic_inc(&state->refs);
|
||||
refcount_inc(&state->refs);
|
||||
wait_on_state(tree, state);
|
||||
free_extent_state(state);
|
||||
goto again;
|
||||
@ -834,7 +834,7 @@ static void cache_state_if_flags(struct extent_state *state,
|
||||
if (cached_ptr && !(*cached_ptr)) {
|
||||
if (!flags || (state->state & flags)) {
|
||||
*cached_ptr = state;
|
||||
atomic_inc(&state->refs);
|
||||
refcount_inc(&state->refs);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1538,7 +1538,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
|
||||
if (!found) {
|
||||
*start = state->start;
|
||||
*cached_state = state;
|
||||
atomic_inc(&state->refs);
|
||||
refcount_inc(&state->refs);
|
||||
}
|
||||
found++;
|
||||
*end = state->end;
|
||||
@ -2004,16 +2004,11 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
|
||||
u64 map_length = 0;
|
||||
u64 sector;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
||||
int ret;
|
||||
|
||||
ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
|
||||
BUG_ON(!mirror_num);
|
||||
|
||||
/* we can't repair anything in raid56 yet */
|
||||
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
|
||||
return 0;
|
||||
|
||||
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
|
||||
if (!bio)
|
||||
return -EIO;
|
||||
@ -2026,17 +2021,35 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
|
||||
* read repair operation.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
||||
&map_length, &bbio, mirror_num);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
|
||||
/*
|
||||
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
|
||||
* to update all raid stripes, but here we just want to correct
|
||||
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
|
||||
* stripe's dev and sector.
|
||||
*/
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
|
||||
&map_length, &bbio, 0);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
ASSERT(bbio->mirror_num == 1);
|
||||
} else {
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
||||
&map_length, &bbio, mirror_num);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
BUG_ON(mirror_num != bbio->mirror_num);
|
||||
}
|
||||
BUG_ON(mirror_num != bbio->mirror_num);
|
||||
sector = bbio->stripes[mirror_num-1].physical >> 9;
|
||||
|
||||
sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
dev = bbio->stripes[mirror_num-1].dev;
|
||||
dev = bbio->stripes[bbio->mirror_num - 1].dev;
|
||||
btrfs_put_bbio(bbio);
|
||||
if (!dev || !dev->bdev || !dev->writeable) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
@ -2859,7 +2872,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
||||
em = *em_cached;
|
||||
if (extent_map_in_tree(em) && start >= em->start &&
|
||||
start < extent_map_end(em)) {
|
||||
atomic_inc(&em->refs);
|
||||
refcount_inc(&em->refs);
|
||||
return em;
|
||||
}
|
||||
|
||||
@ -2870,7 +2883,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
||||
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
|
||||
if (em_cached && !IS_ERR_OR_NULL(em)) {
|
||||
BUG_ON(*em_cached);
|
||||
atomic_inc(&em->refs);
|
||||
refcount_inc(&em->refs);
|
||||
*em_cached = em;
|
||||
}
|
||||
return em;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __EXTENTIO__
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/refcount.h>
|
||||
#include "ulist.h"
|
||||
|
||||
/* bits for the extent state */
|
||||
@ -14,14 +15,17 @@
|
||||
#define EXTENT_DEFRAG (1U << 6)
|
||||
#define EXTENT_BOUNDARY (1U << 9)
|
||||
#define EXTENT_NODATASUM (1U << 10)
|
||||
#define EXTENT_DO_ACCOUNTING (1U << 11)
|
||||
#define EXTENT_CLEAR_META_RESV (1U << 11)
|
||||
#define EXTENT_FIRST_DELALLOC (1U << 12)
|
||||
#define EXTENT_NEED_WAIT (1U << 13)
|
||||
#define EXTENT_DAMAGED (1U << 14)
|
||||
#define EXTENT_NORESERVE (1U << 15)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 16)
|
||||
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
|
||||
#define EXTENT_DELALLOC_NEW (1U << 18)
|
||||
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
|
||||
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
||||
EXTENT_CLEAR_DATA_RESV)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
|
||||
|
||||
/*
|
||||
@ -143,7 +147,7 @@ struct extent_state {
|
||||
|
||||
/* ADD NEW ELEMENTS AFTER THIS */
|
||||
wait_queue_head_t wq;
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
unsigned state;
|
||||
|
||||
struct io_failure_record *failrec;
|
||||
|
@ -55,7 +55,7 @@ struct extent_map *alloc_extent_map(void)
|
||||
em->flags = 0;
|
||||
em->compress_type = BTRFS_COMPRESS_NONE;
|
||||
em->generation = 0;
|
||||
atomic_set(&em->refs, 1);
|
||||
refcount_set(&em->refs, 1);
|
||||
INIT_LIST_HEAD(&em->list);
|
||||
return em;
|
||||
}
|
||||
@ -71,8 +71,8 @@ void free_extent_map(struct extent_map *em)
|
||||
{
|
||||
if (!em)
|
||||
return;
|
||||
WARN_ON(atomic_read(&em->refs) == 0);
|
||||
if (atomic_dec_and_test(&em->refs)) {
|
||||
WARN_ON(refcount_read(&em->refs) == 0);
|
||||
if (refcount_dec_and_test(&em->refs)) {
|
||||
WARN_ON(extent_map_in_tree(em));
|
||||
WARN_ON(!list_empty(&em->list));
|
||||
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
|
||||
@ -322,7 +322,7 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
|
||||
struct extent_map *em,
|
||||
int modified)
|
||||
{
|
||||
atomic_inc(&em->refs);
|
||||
refcount_inc(&em->refs);
|
||||
em->mod_start = em->start;
|
||||
em->mod_len = em->len;
|
||||
|
||||
@ -381,7 +381,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
|
||||
if (strict && !(end > em->start && start < extent_map_end(em)))
|
||||
return NULL;
|
||||
|
||||
atomic_inc(&em->refs);
|
||||
refcount_inc(&em->refs);
|
||||
return em;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define __EXTENTMAP__
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/refcount.h>
|
||||
|
||||
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
|
||||
#define EXTENT_MAP_HOLE ((u64)-3)
|
||||
@ -41,7 +42,7 @@ struct extent_map {
|
||||
*/
|
||||
struct map_lookup *map_lookup;
|
||||
};
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
unsigned int compress_type;
|
||||
struct list_head list;
|
||||
};
|
||||
|
@ -1404,6 +1404,47 @@ fail:
|
||||
|
||||
}
|
||||
|
||||
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
|
||||
const u64 start,
|
||||
const u64 len,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
u64 search_start = start;
|
||||
const u64 end = start + len - 1;
|
||||
|
||||
while (search_start < end) {
|
||||
const u64 search_len = end - search_start + 1;
|
||||
struct extent_map *em;
|
||||
u64 em_len;
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, search_start,
|
||||
search_len, 0);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
if (em->block_start != EXTENT_MAP_HOLE)
|
||||
goto next;
|
||||
|
||||
em_len = em->len;
|
||||
if (em->start < search_start)
|
||||
em_len -= search_start - em->start;
|
||||
if (em_len > search_len)
|
||||
em_len = search_len;
|
||||
|
||||
ret = set_extent_bit(&inode->io_tree, search_start,
|
||||
search_start + em_len - 1,
|
||||
EXTENT_DELALLOC_NEW,
|
||||
NULL, cached_state, GFP_NOFS);
|
||||
next:
|
||||
search_start = extent_map_end(em);
|
||||
free_extent_map(em);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function locks the extent and properly waits for data=ordered extents
|
||||
* to finish before allowing the pages to be modified if need.
|
||||
@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
+ round_up(pos + write_bytes - start_pos,
|
||||
fs_info->sectorsize) - 1;
|
||||
|
||||
if (start_pos < inode->vfs_inode.i_size) {
|
||||
if (start_pos < inode->vfs_inode.i_size ||
|
||||
(inode->flags & BTRFS_INODE_PREALLOC)) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
unsigned int clear_bits;
|
||||
|
||||
lock_extent_bits(&inode->io_tree, start_pos, last_pos,
|
||||
cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start_pos,
|
||||
@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
}
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
|
||||
ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
|
||||
last_pos - start_pos + 1,
|
||||
cached_state);
|
||||
clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
|
||||
if (ret)
|
||||
clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
|
||||
clear_extent_bit(&inode->io_tree, start_pos,
|
||||
last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
|
||||
0, 0, cached_state, GFP_NOFS);
|
||||
last_pos, clear_bits,
|
||||
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
|
||||
0, cached_state, GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
*lockstart = start_pos;
|
||||
*lockend = last_pos;
|
||||
ret = 1;
|
||||
@ -2342,13 +2394,8 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
if (!em)
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
ret = PTR_ERR(em);
|
||||
return ret;
|
||||
}
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
/* Hole or vacuum extent(only exists in no-hole mode) */
|
||||
if (em->block_start == EXTENT_MAP_HOLE) {
|
||||
@ -2835,11 +2882,8 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
while (1) {
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
||||
alloc_end - cur_offset, 0);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
if (!em)
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
ret = PTR_ERR(em);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
break;
|
||||
}
|
||||
last_byte = min(extent_map_end(em), alloc_end);
|
||||
@ -2856,8 +2900,10 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
}
|
||||
ret = btrfs_qgroup_reserve_data(inode, cur_offset,
|
||||
last_byte - cur_offset);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Do not need to reserve unwritten extent for this
|
||||
|
@ -355,7 +355,7 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
|
||||
io_ctl->orig = io_ctl->cur;
|
||||
io_ctl->size = PAGE_SIZE;
|
||||
if (clear)
|
||||
memset(io_ctl->cur, 0, PAGE_SIZE);
|
||||
clear_page(io_ctl->cur);
|
||||
}
|
||||
|
||||
static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
|
||||
|
289
fs/btrfs/inode.c
289
fs/btrfs/inode.c
@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
|
||||
u64 ram_bytes, int compress_type,
|
||||
int type);
|
||||
|
||||
static void __endio_write_update_ordered(struct inode *inode,
|
||||
const u64 offset, const u64 bytes,
|
||||
const bool uptodate);
|
||||
|
||||
/*
|
||||
* Cleanup all submitted ordered extents in specified range to handle errors
|
||||
* from the fill_dellaloc() callback.
|
||||
*
|
||||
* NOTE: caller must ensure that when an error happens, it can not call
|
||||
* extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
|
||||
* and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
|
||||
* to be released, which we want to happen only when finishing the ordered
|
||||
* extent (btrfs_finish_ordered_io()). Also note that the caller of the
|
||||
* fill_delalloc() callback already does proper cleanup for the first page of
|
||||
* the range, that is, it invokes the callback writepage_end_io_hook() for the
|
||||
* range of the first page.
|
||||
*/
|
||||
static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
|
||||
const u64 offset,
|
||||
const u64 bytes)
|
||||
{
|
||||
return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
|
||||
bytes - PAGE_SIZE, false);
|
||||
}
|
||||
|
||||
static int btrfs_dirty_inode(struct inode *inode);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
@ -547,7 +572,7 @@ cont:
|
||||
}
|
||||
if (ret <= 0) {
|
||||
unsigned long clear_flags = EXTENT_DELALLOC |
|
||||
EXTENT_DEFRAG;
|
||||
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
|
||||
unsigned long page_error_op;
|
||||
|
||||
clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
|
||||
@ -565,8 +590,10 @@ cont:
|
||||
PAGE_SET_WRITEBACK |
|
||||
page_error_op |
|
||||
PAGE_END_WRITEBACK);
|
||||
btrfs_free_reserved_data_space_noquota(inode, start,
|
||||
end - start + 1);
|
||||
if (ret == 0)
|
||||
btrfs_free_reserved_data_space_noquota(inode,
|
||||
start,
|
||||
end - start + 1);
|
||||
goto free_pages_out;
|
||||
}
|
||||
}
|
||||
@ -852,6 +879,7 @@ out_free:
|
||||
async_extent->start +
|
||||
async_extent->ram_size - 1,
|
||||
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
|
||||
EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
|
||||
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
|
||||
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
|
||||
@ -918,10 +946,13 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
u64 num_bytes;
|
||||
unsigned long ram_size;
|
||||
u64 disk_num_bytes;
|
||||
u64 cur_alloc_size;
|
||||
u64 cur_alloc_size = 0;
|
||||
u64 blocksize = fs_info->sectorsize;
|
||||
struct btrfs_key ins;
|
||||
struct extent_map *em;
|
||||
unsigned clear_bits;
|
||||
unsigned long page_ops;
|
||||
bool extent_reserved = false;
|
||||
int ret = 0;
|
||||
|
||||
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
|
||||
@ -944,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
extent_clear_unlock_delalloc(inode, start, end,
|
||||
delalloc_end, NULL,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC |
|
||||
EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG, PAGE_UNLOCK |
|
||||
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
|
||||
PAGE_END_WRITEBACK);
|
||||
@ -966,14 +998,14 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
start + num_bytes - 1, 0);
|
||||
|
||||
while (disk_num_bytes > 0) {
|
||||
unsigned long op;
|
||||
|
||||
cur_alloc_size = disk_num_bytes;
|
||||
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
|
||||
fs_info->sectorsize, 0, alloc_hint,
|
||||
&ins, 1, 1);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
cur_alloc_size = ins.offset;
|
||||
extent_reserved = true;
|
||||
|
||||
ram_size = ins.offset;
|
||||
em = create_io_em(inode, start, ins.offset, /* len */
|
||||
@ -988,7 +1020,6 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
goto out_reserve;
|
||||
free_extent_map(em);
|
||||
|
||||
cur_alloc_size = ins.offset;
|
||||
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
|
||||
ram_size, cur_alloc_size, 0);
|
||||
if (ret)
|
||||
@ -998,15 +1029,24 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
ret = btrfs_reloc_clone_csums(inode, start,
|
||||
cur_alloc_size);
|
||||
/*
|
||||
* Only drop cache here, and process as normal.
|
||||
*
|
||||
* We must not allow extent_clear_unlock_delalloc()
|
||||
* at out_unlock label to free meta of this ordered
|
||||
* extent, as its meta should be freed by
|
||||
* btrfs_finish_ordered_io().
|
||||
*
|
||||
* So we must continue until @start is increased to
|
||||
* skip current ordered extent.
|
||||
*/
|
||||
if (ret)
|
||||
goto out_drop_extent_cache;
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), start,
|
||||
start + ram_size - 1, 0);
|
||||
}
|
||||
|
||||
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
|
||||
|
||||
if (disk_num_bytes < cur_alloc_size)
|
||||
break;
|
||||
|
||||
/* we're not doing compressed IO, don't unlock the first
|
||||
* page (which the caller expects to stay locked), don't
|
||||
* clear any dirty bits and don't set any writeback bits
|
||||
@ -1014,18 +1054,30 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
* Do set the Private2 bit so we know this page was properly
|
||||
* setup for writepage
|
||||
*/
|
||||
op = unlock ? PAGE_UNLOCK : 0;
|
||||
op |= PAGE_SET_PRIVATE2;
|
||||
page_ops = unlock ? PAGE_UNLOCK : 0;
|
||||
page_ops |= PAGE_SET_PRIVATE2;
|
||||
|
||||
extent_clear_unlock_delalloc(inode, start,
|
||||
start + ram_size - 1,
|
||||
delalloc_end, locked_page,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC,
|
||||
op);
|
||||
disk_num_bytes -= cur_alloc_size;
|
||||
page_ops);
|
||||
if (disk_num_bytes < cur_alloc_size)
|
||||
disk_num_bytes = 0;
|
||||
else
|
||||
disk_num_bytes -= cur_alloc_size;
|
||||
num_bytes -= cur_alloc_size;
|
||||
alloc_hint = ins.objectid + ins.offset;
|
||||
start += cur_alloc_size;
|
||||
extent_reserved = false;
|
||||
|
||||
/*
|
||||
* btrfs_reloc_clone_csums() error, since start is increased
|
||||
* extent_clear_unlock_delalloc() at out_unlock label won't
|
||||
* free metadata of current ordered extent, we're OK to exit.
|
||||
*/
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
@ -1036,12 +1088,35 @@ out_reserve:
|
||||
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
|
||||
out_unlock:
|
||||
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
|
||||
page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
|
||||
PAGE_END_WRITEBACK;
|
||||
/*
|
||||
* If we reserved an extent for our delalloc range (or a subrange) and
|
||||
* failed to create the respective ordered extent, then it means that
|
||||
* when we reserved the extent we decremented the extent's size from
|
||||
* the data space_info's bytes_may_use counter and incremented the
|
||||
* space_info's bytes_reserved counter by the same amount. We must make
|
||||
* sure extent_clear_unlock_delalloc() does not try to decrement again
|
||||
* the data space_info's bytes_may_use counter, therefore we do not pass
|
||||
* it the flag EXTENT_CLEAR_DATA_RESV.
|
||||
*/
|
||||
if (extent_reserved) {
|
||||
extent_clear_unlock_delalloc(inode, start,
|
||||
start + cur_alloc_size,
|
||||
start + cur_alloc_size,
|
||||
locked_page,
|
||||
clear_bits,
|
||||
page_ops);
|
||||
start += cur_alloc_size;
|
||||
if (start >= end)
|
||||
goto out;
|
||||
}
|
||||
extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
|
||||
locked_page,
|
||||
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_DELALLOC | EXTENT_DEFRAG,
|
||||
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
|
||||
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
|
||||
clear_bits | EXTENT_CLEAR_DATA_RESV,
|
||||
page_ops);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1414,15 +1489,14 @@ out_check:
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (root->root_key.objectid ==
|
||||
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
BTRFS_DATA_RELOC_TREE_OBJECTID)
|
||||
/*
|
||||
* Error handled later, as we must prevent
|
||||
* extent_clear_unlock_delalloc() in error handler
|
||||
* from freeing metadata of created ordered extent.
|
||||
*/
|
||||
ret = btrfs_reloc_clone_csums(inode, cur_offset,
|
||||
num_bytes);
|
||||
if (ret) {
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
extent_clear_unlock_delalloc(inode, cur_offset,
|
||||
cur_offset + num_bytes - 1, end,
|
||||
@ -1434,6 +1508,14 @@ out_check:
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
cur_offset = extent_end;
|
||||
|
||||
/*
|
||||
* btrfs_reloc_clone_csums() error, now we're OK to call error
|
||||
* handler, as metadata for created ordered extent will only
|
||||
* be freed by btrfs_finish_ordered_io().
|
||||
*/
|
||||
if (ret)
|
||||
goto error;
|
||||
if (cur_offset > end)
|
||||
break;
|
||||
}
|
||||
@ -1509,6 +1591,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
|
||||
ret = cow_file_range_async(inode, locked_page, start, end,
|
||||
page_started, nr_written);
|
||||
}
|
||||
if (ret)
|
||||
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1693,6 +1777,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
|
||||
btrfs_add_delalloc_inodes(root, inode);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
if (!(state->state & EXTENT_DELALLOC_NEW) &&
|
||||
(*bits & EXTENT_DELALLOC_NEW)) {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
|
||||
state->start;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1722,7 +1814,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
|
||||
|
||||
if (*bits & EXTENT_FIRST_DELALLOC) {
|
||||
*bits &= ~EXTENT_FIRST_DELALLOC;
|
||||
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
|
||||
} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
|
||||
spin_lock(&inode->lock);
|
||||
inode->outstanding_extents -= num_extents;
|
||||
spin_unlock(&inode->lock);
|
||||
@ -1733,7 +1825,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
|
||||
* don't need to call dellalloc_release_metadata if there is an
|
||||
* error.
|
||||
*/
|
||||
if (*bits & EXTENT_DO_ACCOUNTING &&
|
||||
if (*bits & EXTENT_CLEAR_META_RESV &&
|
||||
root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(inode, len);
|
||||
|
||||
@ -1741,10 +1833,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
|
||||
if (btrfs_is_testing(fs_info))
|
||||
return;
|
||||
|
||||
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
|
||||
&& do_list && !(state->state & EXTENT_NORESERVE)
|
||||
&& (*bits & (EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_CLEAR_DATA_RESV)))
|
||||
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
|
||||
do_list && !(state->state & EXTENT_NORESERVE) &&
|
||||
(*bits & EXTENT_CLEAR_DATA_RESV))
|
||||
btrfs_free_reserved_data_space_noquota(
|
||||
&inode->vfs_inode,
|
||||
state->start, len);
|
||||
@ -1759,6 +1850,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
|
||||
btrfs_del_delalloc_inode(root, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
if ((state->state & EXTENT_DELALLOC_NEW) &&
|
||||
(*bits & EXTENT_DELALLOC_NEW)) {
|
||||
spin_lock(&inode->lock);
|
||||
ASSERT(inode->new_delalloc_bytes >= len);
|
||||
inode->new_delalloc_bytes -= len;
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2791,6 +2890,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
u64 logical_len = ordered_extent->len;
|
||||
bool nolock;
|
||||
bool truncated = false;
|
||||
bool range_locked = false;
|
||||
bool clear_new_delalloc_bytes = false;
|
||||
|
||||
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
|
||||
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
|
||||
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
|
||||
clear_new_delalloc_bytes = true;
|
||||
|
||||
nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
|
||||
|
||||
@ -2839,6 +2945,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
goto out;
|
||||
}
|
||||
|
||||
range_locked = true;
|
||||
lock_extent_bits(io_tree, ordered_extent->file_offset,
|
||||
ordered_extent->file_offset + ordered_extent->len - 1,
|
||||
&cached_state);
|
||||
@ -2864,7 +2971,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trans->block_rsv = &fs_info->delalloc_block_rsv;
|
||||
@ -2896,7 +3003,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
trans->transid);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
add_pending_csums(trans, inode, &ordered_extent->list);
|
||||
@ -2905,14 +3012,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
ret = btrfs_update_inode_fallback(trans, root, inode);
|
||||
if (ret) { /* -ENOMEM or corruption */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
unlock_extent_cached(io_tree, ordered_extent->file_offset,
|
||||
ordered_extent->file_offset +
|
||||
ordered_extent->len - 1, &cached_state, GFP_NOFS);
|
||||
out:
|
||||
if (range_locked || clear_new_delalloc_bytes) {
|
||||
unsigned int clear_bits = 0;
|
||||
|
||||
if (range_locked)
|
||||
clear_bits |= EXTENT_LOCKED;
|
||||
if (clear_new_delalloc_bytes)
|
||||
clear_bits |= EXTENT_DELALLOC_NEW;
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree,
|
||||
ordered_extent->file_offset,
|
||||
ordered_extent->file_offset +
|
||||
ordered_extent->len - 1,
|
||||
clear_bits,
|
||||
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
|
||||
0, &cached_state, GFP_NOFS);
|
||||
}
|
||||
|
||||
if (root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
ordered_extent->len);
|
||||
@ -4401,9 +4520,17 @@ search_again:
|
||||
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
|
||||
item_end +=
|
||||
btrfs_file_extent_num_bytes(leaf, fi);
|
||||
|
||||
trace_btrfs_truncate_show_fi_regular(
|
||||
BTRFS_I(inode), leaf, fi,
|
||||
found_key.offset);
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
item_end += btrfs_file_extent_inline_len(leaf,
|
||||
path->slots[0], fi);
|
||||
|
||||
trace_btrfs_truncate_show_fi_inline(
|
||||
BTRFS_I(inode), leaf, fi, path->slots[0],
|
||||
found_key.offset);
|
||||
}
|
||||
item_end--;
|
||||
}
|
||||
@ -4603,13 +4730,6 @@ error:
|
||||
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (err == 0) {
|
||||
/* only inline file may have last_size != new_size */
|
||||
if (new_size >= fs_info->sectorsize ||
|
||||
new_size > fs_info->max_inline)
|
||||
ASSERT(last_size == new_size);
|
||||
}
|
||||
|
||||
if (be_nice && bytes_deleted > SZ_32M) {
|
||||
unsigned long updates = trans->delayed_ref_updates;
|
||||
if (updates) {
|
||||
@ -6735,7 +6855,6 @@ static noinline int uncompress_inline(struct btrfs_path *path,
|
||||
*
|
||||
* This also copies inline extents directly into the page.
|
||||
*/
|
||||
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page,
|
||||
size_t pg_offset, u64 start, u64 len,
|
||||
@ -6835,11 +6954,18 @@ again:
|
||||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
extent_end = extent_start +
|
||||
btrfs_file_extent_num_bytes(leaf, item);
|
||||
|
||||
trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
|
||||
extent_start);
|
||||
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
size_t size;
|
||||
size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
|
||||
extent_end = ALIGN(extent_start + size,
|
||||
fs_info->sectorsize);
|
||||
|
||||
trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
|
||||
path->slots[0],
|
||||
extent_start);
|
||||
}
|
||||
next:
|
||||
if (start >= extent_end) {
|
||||
@ -7037,19 +7163,17 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
|
||||
if (IS_ERR(em))
|
||||
return em;
|
||||
if (em) {
|
||||
/*
|
||||
* if our em maps to
|
||||
* - a hole or
|
||||
* - a pre-alloc extent,
|
||||
* there might actually be delalloc bytes behind it.
|
||||
*/
|
||||
if (em->block_start != EXTENT_MAP_HOLE &&
|
||||
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
|
||||
return em;
|
||||
else
|
||||
hole_em = em;
|
||||
}
|
||||
/*
|
||||
* If our em maps to:
|
||||
* - a hole or
|
||||
* - a pre-alloc extent,
|
||||
* there might actually be delalloc bytes behind it.
|
||||
*/
|
||||
if (em->block_start != EXTENT_MAP_HOLE &&
|
||||
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
|
||||
return em;
|
||||
else
|
||||
hole_em = em;
|
||||
|
||||
/* check to see if we've wrapped (len == -1 or similar) */
|
||||
end = start + len;
|
||||
@ -8127,17 +8251,26 @@ static void btrfs_endio_direct_read(struct bio *bio)
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
|
||||
const u64 offset,
|
||||
const u64 bytes,
|
||||
const int uptodate)
|
||||
static void __endio_write_update_ordered(struct inode *inode,
|
||||
const u64 offset, const u64 bytes,
|
||||
const bool uptodate)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_ordered_extent *ordered = NULL;
|
||||
struct btrfs_workqueue *wq;
|
||||
btrfs_work_func_t func;
|
||||
u64 ordered_offset = offset;
|
||||
u64 ordered_bytes = bytes;
|
||||
int ret;
|
||||
|
||||
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
|
||||
wq = fs_info->endio_freespace_worker;
|
||||
func = btrfs_freespace_write_helper;
|
||||
} else {
|
||||
wq = fs_info->endio_write_workers;
|
||||
func = btrfs_endio_write_helper;
|
||||
}
|
||||
|
||||
again:
|
||||
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
|
||||
&ordered_offset,
|
||||
@ -8146,9 +8279,8 @@ again:
|
||||
if (!ret)
|
||||
goto out_test;
|
||||
|
||||
btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
|
||||
finish_ordered_fn, NULL, NULL);
|
||||
btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
|
||||
btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
|
||||
btrfs_queue_work(wq, &ordered->work);
|
||||
out_test:
|
||||
/*
|
||||
* our bio might span multiple ordered extents. If we haven't
|
||||
@ -8166,10 +8298,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
|
||||
struct btrfs_dio_private *dip = bio->bi_private;
|
||||
struct bio *dio_bio = dip->dio_bio;
|
||||
|
||||
btrfs_endio_direct_write_update_ordered(dip->inode,
|
||||
dip->logical_offset,
|
||||
dip->bytes,
|
||||
!bio->bi_error);
|
||||
__endio_write_update_ordered(dip->inode, dip->logical_offset,
|
||||
dip->bytes, !bio->bi_error);
|
||||
|
||||
kfree(dip);
|
||||
|
||||
@ -8530,10 +8660,10 @@ free_ordered:
|
||||
io_bio = NULL;
|
||||
} else {
|
||||
if (write)
|
||||
btrfs_endio_direct_write_update_ordered(inode,
|
||||
__endio_write_update_ordered(inode,
|
||||
file_offset,
|
||||
dio_bio->bi_iter.bi_size,
|
||||
0);
|
||||
false);
|
||||
else
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
|
||||
file_offset + dio_bio->bi_iter.bi_size - 1);
|
||||
@ -8668,11 +8798,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
*/
|
||||
if (dio_data.unsubmitted_oe_range_start <
|
||||
dio_data.unsubmitted_oe_range_end)
|
||||
btrfs_endio_direct_write_update_ordered(inode,
|
||||
__endio_write_update_ordered(inode,
|
||||
dio_data.unsubmitted_oe_range_start,
|
||||
dio_data.unsubmitted_oe_range_end -
|
||||
dio_data.unsubmitted_oe_range_start,
|
||||
0);
|
||||
false);
|
||||
} else if (ret >= 0 && (size_t)ret < count)
|
||||
btrfs_delalloc_release_space(inode, offset,
|
||||
count - (size_t)ret);
|
||||
@ -8819,6 +8949,7 @@ again:
|
||||
if (!inode_evicting)
|
||||
clear_extent_bit(tree, start, end,
|
||||
EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DELALLOC_NEW |
|
||||
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_DEFRAG, 1, 0, &cached_state,
|
||||
GFP_NOFS);
|
||||
@ -8876,8 +9007,8 @@ again:
|
||||
if (!inode_evicting) {
|
||||
clear_extent_bit(tree, page_start, page_end,
|
||||
EXTENT_LOCKED | EXTENT_DIRTY |
|
||||
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_DEFRAG, 1, 1,
|
||||
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
|
||||
&cached_state, GFP_NOFS);
|
||||
|
||||
__btrfs_releasepage(page, GFP_NOFS);
|
||||
@ -9248,6 +9379,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
ei->last_sub_trans = 0;
|
||||
ei->logged_trans = 0;
|
||||
ei->delalloc_bytes = 0;
|
||||
ei->new_delalloc_bytes = 0;
|
||||
ei->defrag_bytes = 0;
|
||||
ei->disk_i_size = 0;
|
||||
ei->flags = 0;
|
||||
@ -9313,6 +9445,7 @@ void btrfs_destroy_inode(struct inode *inode)
|
||||
WARN_ON(BTRFS_I(inode)->outstanding_extents);
|
||||
WARN_ON(BTRFS_I(inode)->reserved_extents);
|
||||
WARN_ON(BTRFS_I(inode)->delalloc_bytes);
|
||||
WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
|
||||
WARN_ON(BTRFS_I(inode)->csum_bytes);
|
||||
WARN_ON(BTRFS_I(inode)->defrag_bytes);
|
||||
|
||||
@ -9436,7 +9569,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
|
||||
stat->dev = BTRFS_I(inode)->root->anon_dev;
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
|
||||
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
|
||||
ALIGN(delalloc_bytes, blocksize)) >> 9;
|
||||
|
@ -1504,7 +1504,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
|
||||
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
|
||||
mnt_drop_write_file(file);
|
||||
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
}
|
||||
@ -1619,7 +1619,7 @@ out_free:
|
||||
kfree(vol_args);
|
||||
out:
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
mnt_drop_write_file(file);
|
||||
return ret;
|
||||
}
|
||||
@ -2661,7 +2661,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1))
|
||||
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
|
||||
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
@ -2680,7 +2680,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
|
||||
kfree(vol_args);
|
||||
out:
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
|
||||
if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
|
||||
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
goto out;
|
||||
}
|
||||
@ -2721,7 +2721,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
|
||||
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
|
||||
}
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
|
||||
if (!ret) {
|
||||
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
|
||||
@ -2752,7 +2752,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
|
||||
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
goto out_drop_write;
|
||||
}
|
||||
@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
|
||||
kfree(vol_args);
|
||||
out:
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
out_drop_write:
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
@ -4439,13 +4439,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
if (atomic_xchg(
|
||||
&fs_info->mutually_exclusive_operation_running, 1)) {
|
||||
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
} else {
|
||||
ret = btrfs_dev_replace_by_ioctl(fs_info, p);
|
||||
atomic_set(
|
||||
&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
}
|
||||
break;
|
||||
case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
|
||||
@ -4640,7 +4638,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
|
||||
return ret;
|
||||
|
||||
again:
|
||||
if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
|
||||
if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
need_unlock = true;
|
||||
@ -4686,7 +4684,7 @@ again:
|
||||
}
|
||||
|
||||
locked:
|
||||
BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
|
||||
BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
|
||||
|
||||
if (arg) {
|
||||
bargs = memdup_user(arg, sizeof(*bargs));
|
||||
@ -4742,11 +4740,10 @@ locked:
|
||||
|
||||
do_balance:
|
||||
/*
|
||||
* Ownership of bctl and mutually_exclusive_operation_running
|
||||
* Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
|
||||
* goes to to btrfs_balance. bctl is freed in __cancel_balance,
|
||||
* or, if restriper was paused all the way until unmount, in
|
||||
* free_fs_info. mutually_exclusive_operation_running is
|
||||
* cleared in __cancel_balance.
|
||||
* free_fs_info. The flag is cleared in __cancel_balance.
|
||||
*/
|
||||
need_unlock = false;
|
||||
|
||||
@ -4766,7 +4763,7 @@ out_unlock:
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
if (need_unlock)
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
|
||||
out:
|
||||
mnt_drop_write_file(file);
|
||||
return ret;
|
||||
|
@ -212,7 +212,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
|
||||
|
||||
/* one ref for the tree */
|
||||
atomic_set(&entry->refs, 1);
|
||||
refcount_set(&entry->refs, 1);
|
||||
init_waitqueue_head(&entry->wait);
|
||||
INIT_LIST_HEAD(&entry->list);
|
||||
INIT_LIST_HEAD(&entry->root_extent_list);
|
||||
@ -358,7 +358,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
||||
out:
|
||||
if (!ret && cached && entry) {
|
||||
*cached = entry;
|
||||
atomic_inc(&entry->refs);
|
||||
refcount_inc(&entry->refs);
|
||||
}
|
||||
spin_unlock_irqrestore(&tree->lock, flags);
|
||||
return ret == 0;
|
||||
@ -425,7 +425,7 @@ have_entry:
|
||||
out:
|
||||
if (!ret && cached && entry) {
|
||||
*cached = entry;
|
||||
atomic_inc(&entry->refs);
|
||||
refcount_inc(&entry->refs);
|
||||
}
|
||||
spin_unlock_irqrestore(&tree->lock, flags);
|
||||
return ret == 0;
|
||||
@ -456,7 +456,7 @@ void btrfs_get_logged_extents(struct btrfs_inode *inode,
|
||||
if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
|
||||
continue;
|
||||
list_add(&ordered->log_list, logged_list);
|
||||
atomic_inc(&ordered->refs);
|
||||
refcount_inc(&ordered->refs);
|
||||
}
|
||||
spin_unlock_irq(&tree->lock);
|
||||
}
|
||||
@ -565,7 +565,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
|
||||
trace_btrfs_ordered_extent_put(entry->inode, entry);
|
||||
|
||||
if (atomic_dec_and_test(&entry->refs)) {
|
||||
if (refcount_dec_and_test(&entry->refs)) {
|
||||
ASSERT(list_empty(&entry->log_list));
|
||||
ASSERT(list_empty(&entry->trans_list));
|
||||
ASSERT(list_empty(&entry->root_extent_list));
|
||||
@ -623,7 +623,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
trans = fs_info->running_transaction;
|
||||
if (trans)
|
||||
atomic_inc(&trans->use_count);
|
||||
refcount_inc(&trans->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
ASSERT(trans);
|
||||
@ -690,7 +690,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
|
||||
list_move_tail(&ordered->root_extent_list,
|
||||
&root->ordered_extents);
|
||||
atomic_inc(&ordered->refs);
|
||||
refcount_inc(&ordered->refs);
|
||||
spin_unlock(&root->ordered_extent_lock);
|
||||
|
||||
btrfs_init_work(&ordered->flush_work,
|
||||
@ -870,7 +870,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||
if (!offset_in_entry(entry, file_offset))
|
||||
entry = NULL;
|
||||
if (entry)
|
||||
atomic_inc(&entry->refs);
|
||||
refcount_inc(&entry->refs);
|
||||
out:
|
||||
spin_unlock_irq(&tree->lock);
|
||||
return entry;
|
||||
@ -911,7 +911,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
|
||||
}
|
||||
out:
|
||||
if (entry)
|
||||
atomic_inc(&entry->refs);
|
||||
refcount_inc(&entry->refs);
|
||||
spin_unlock_irq(&tree->lock);
|
||||
return entry;
|
||||
}
|
||||
@ -948,7 +948,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
|
||||
goto out;
|
||||
|
||||
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
atomic_inc(&entry->refs);
|
||||
refcount_inc(&entry->refs);
|
||||
out:
|
||||
spin_unlock_irq(&tree->lock);
|
||||
return entry;
|
||||
|
@ -113,7 +113,7 @@ struct btrfs_ordered_extent {
|
||||
int compress_type;
|
||||
|
||||
/* reference count */
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
|
||||
/* the inode we belong to */
|
||||
struct inode *inode;
|
||||
|
@ -47,50 +47,6 @@
|
||||
* - check all ioctl parameters
|
||||
*/
|
||||
|
||||
/*
|
||||
* one struct for each qgroup, organized in fs_info->qgroup_tree.
|
||||
*/
|
||||
struct btrfs_qgroup {
|
||||
u64 qgroupid;
|
||||
|
||||
/*
|
||||
* state
|
||||
*/
|
||||
u64 rfer; /* referenced */
|
||||
u64 rfer_cmpr; /* referenced compressed */
|
||||
u64 excl; /* exclusive */
|
||||
u64 excl_cmpr; /* exclusive compressed */
|
||||
|
||||
/*
|
||||
* limits
|
||||
*/
|
||||
u64 lim_flags; /* which limits are set */
|
||||
u64 max_rfer;
|
||||
u64 max_excl;
|
||||
u64 rsv_rfer;
|
||||
u64 rsv_excl;
|
||||
|
||||
/*
|
||||
* reservation tracking
|
||||
*/
|
||||
u64 reserved;
|
||||
|
||||
/*
|
||||
* lists
|
||||
*/
|
||||
struct list_head groups; /* groups this group is member of */
|
||||
struct list_head members; /* groups that are members of this group */
|
||||
struct list_head dirty; /* dirty groups */
|
||||
struct rb_node node; /* tree of qgroups */
|
||||
|
||||
/*
|
||||
* temp variables for accounting operations
|
||||
* Refer to qgroup_shared_accounting() for details.
|
||||
*/
|
||||
u64 old_refcnt;
|
||||
u64 new_refcnt;
|
||||
};
|
||||
|
||||
static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
|
||||
int mod)
|
||||
{
|
||||
@ -1078,6 +1034,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
|
||||
qgroup->excl += sign * num_bytes;
|
||||
qgroup->excl_cmpr += sign * num_bytes;
|
||||
if (sign > 0) {
|
||||
trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
|
||||
if (qgroup->reserved < num_bytes)
|
||||
report_reserved_underflow(fs_info, qgroup, num_bytes);
|
||||
else
|
||||
@ -1103,6 +1060,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
|
||||
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
|
||||
qgroup->excl += sign * num_bytes;
|
||||
if (sign > 0) {
|
||||
trace_qgroup_update_reserve(fs_info, qgroup,
|
||||
-(s64)num_bytes);
|
||||
if (qgroup->reserved < num_bytes)
|
||||
report_reserved_underflow(fs_info, qgroup,
|
||||
num_bytes);
|
||||
@ -2058,12 +2017,12 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
* Use (u64)-1 as time_seq to do special search, which
|
||||
* Use SEQ_LAST as time_seq to do special search, which
|
||||
* doesn't lock tree or delayed_refs and search current
|
||||
* root. It's safe inside commit_transaction().
|
||||
*/
|
||||
ret = btrfs_find_all_roots(trans, fs_info,
|
||||
record->bytenr, (u64)-1, &new_roots);
|
||||
record->bytenr, SEQ_LAST, &new_roots);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
if (qgroup_to_skip)
|
||||
@ -2370,6 +2329,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 ref_root = root->root_key.objectid;
|
||||
int ret = 0;
|
||||
int retried = 0;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
|
||||
@ -2378,7 +2338,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
|
||||
|
||||
if (num_bytes == 0)
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root)
|
||||
@ -2405,6 +2365,27 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
|
||||
qg = unode_aux_to_qgroup(unode);
|
||||
|
||||
if (enforce && !qgroup_check_limits(qg, num_bytes)) {
|
||||
/*
|
||||
* Commit the tree and retry, since we may have
|
||||
* deletions which would free up space.
|
||||
*/
|
||||
if (!retried && qg->reserved > 0) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
ret = btrfs_start_delalloc_inodes(root, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret)
|
||||
return ret;
|
||||
retried++;
|
||||
goto retry;
|
||||
}
|
||||
ret = -EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
@ -2427,6 +2408,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
|
||||
|
||||
qg = unode_aux_to_qgroup(unode);
|
||||
|
||||
trace_qgroup_update_reserve(fs_info, qg, num_bytes);
|
||||
qg->reserved += num_bytes;
|
||||
}
|
||||
|
||||
@ -2472,6 +2454,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
|
||||
qg = unode_aux_to_qgroup(unode);
|
||||
|
||||
trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
|
||||
if (qg->reserved < num_bytes)
|
||||
report_reserved_underflow(fs_info, qg, num_bytes);
|
||||
else
|
||||
@ -2490,18 +2473,6 @@ out:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
}
|
||||
|
||||
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
|
||||
return;
|
||||
btrfs_err(trans->fs_info,
|
||||
"qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x",
|
||||
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
|
||||
(u32)(trans->delayed_ref_elem.seq >> 32),
|
||||
(u32)trans->delayed_ref_elem.seq);
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* returns < 0 on error, 0 when more leafs are to be scanned.
|
||||
* returns 1 when done.
|
||||
@ -2889,14 +2860,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (free) {
|
||||
if (free)
|
||||
trace_op = QGROUP_FREE;
|
||||
trace_btrfs_qgroup_release_data(inode, start, len,
|
||||
changeset.bytes_changed, trace_op);
|
||||
if (free)
|
||||
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
|
||||
BTRFS_I(inode)->root->objectid,
|
||||
changeset.bytes_changed);
|
||||
trace_op = QGROUP_FREE;
|
||||
}
|
||||
trace_btrfs_qgroup_release_data(inode, start, len,
|
||||
changeset.bytes_changed, trace_op);
|
||||
out:
|
||||
ulist_release(&changeset.range_changed);
|
||||
return ret;
|
||||
@ -2948,6 +2919,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
|
||||
return 0;
|
||||
|
||||
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
|
||||
trace_qgroup_meta_reserve(root, (s64)num_bytes);
|
||||
ret = qgroup_reserve(root, num_bytes, enforce);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@ -2967,6 +2939,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
|
||||
reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
|
||||
if (reserved == 0)
|
||||
return;
|
||||
trace_qgroup_meta_reserve(root, -(s64)reserved);
|
||||
btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
|
||||
}
|
||||
|
||||
@ -2981,6 +2954,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
|
||||
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
|
||||
WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
|
||||
atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
|
||||
trace_qgroup_meta_reserve(root, -(s64)num_bytes);
|
||||
btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,50 @@ struct btrfs_qgroup_extent_record {
|
||||
struct ulist *old_roots;
|
||||
};
|
||||
|
||||
/*
|
||||
* one struct for each qgroup, organized in fs_info->qgroup_tree.
|
||||
*/
|
||||
struct btrfs_qgroup {
|
||||
u64 qgroupid;
|
||||
|
||||
/*
|
||||
* state
|
||||
*/
|
||||
u64 rfer; /* referenced */
|
||||
u64 rfer_cmpr; /* referenced compressed */
|
||||
u64 excl; /* exclusive */
|
||||
u64 excl_cmpr; /* exclusive compressed */
|
||||
|
||||
/*
|
||||
* limits
|
||||
*/
|
||||
u64 lim_flags; /* which limits are set */
|
||||
u64 max_rfer;
|
||||
u64 max_excl;
|
||||
u64 rsv_rfer;
|
||||
u64 rsv_excl;
|
||||
|
||||
/*
|
||||
* reservation tracking
|
||||
*/
|
||||
u64 reserved;
|
||||
|
||||
/*
|
||||
* lists
|
||||
*/
|
||||
struct list_head groups; /* groups this group is member of */
|
||||
struct list_head members; /* groups that are members of this group */
|
||||
struct list_head dirty; /* dirty groups */
|
||||
struct rb_node node; /* tree of qgroups */
|
||||
|
||||
/*
|
||||
* temp variables for accounting operations
|
||||
* Refer to qgroup_shared_accounting() for details.
|
||||
*/
|
||||
u64 old_refcnt;
|
||||
u64 new_refcnt;
|
||||
};
|
||||
|
||||
/*
|
||||
* For qgroup event trace points only
|
||||
*/
|
||||
@ -186,17 +230,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_qgroup_inherit *inherit);
|
||||
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes);
|
||||
/*
|
||||
* TODO: Add proper trace point for it, as btrfs_qgroup_free() is
|
||||
* called by everywhere, can't provide good trace for delayed ref case.
|
||||
*/
|
||||
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes)
|
||||
{
|
||||
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
|
||||
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
|
||||
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
|
||||
}
|
||||
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
|
@ -149,7 +149,7 @@ struct btrfs_raid_bio {
|
||||
|
||||
int generic_bio_cnt;
|
||||
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
|
||||
atomic_t stripes_pending;
|
||||
|
||||
@ -389,7 +389,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
|
||||
if (bio_list_empty(&rbio->bio_list)) {
|
||||
if (!list_empty(&rbio->hash_list)) {
|
||||
list_del_init(&rbio->hash_list);
|
||||
atomic_dec(&rbio->refs);
|
||||
refcount_dec(&rbio->refs);
|
||||
BUG_ON(!list_empty(&rbio->plug_list));
|
||||
}
|
||||
}
|
||||
@ -480,7 +480,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
|
||||
|
||||
/* bump our ref if we were not in the list before */
|
||||
if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
|
||||
atomic_inc(&rbio->refs);
|
||||
refcount_inc(&rbio->refs);
|
||||
|
||||
if (!list_empty(&rbio->stripe_cache)){
|
||||
list_move(&rbio->stripe_cache, &table->stripe_cache);
|
||||
@ -689,7 +689,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
|
||||
test_bit(RBIO_CACHE_BIT, &cur->flags) &&
|
||||
!test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
|
||||
list_del_init(&cur->hash_list);
|
||||
atomic_dec(&cur->refs);
|
||||
refcount_dec(&cur->refs);
|
||||
|
||||
steal_rbio(cur, rbio);
|
||||
cache_drop = cur;
|
||||
@ -738,7 +738,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
}
|
||||
lockit:
|
||||
atomic_inc(&rbio->refs);
|
||||
refcount_inc(&rbio->refs);
|
||||
list_add(&rbio->hash_list, &h->hash_list);
|
||||
out:
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
@ -784,7 +784,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
list_del_init(&rbio->hash_list);
|
||||
atomic_dec(&rbio->refs);
|
||||
refcount_dec(&rbio->refs);
|
||||
|
||||
/*
|
||||
* we use the plug list to hold all the rbios
|
||||
@ -801,7 +801,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
||||
list_del_init(&rbio->plug_list);
|
||||
|
||||
list_add(&next->hash_list, &h->hash_list);
|
||||
atomic_inc(&next->refs);
|
||||
refcount_inc(&next->refs);
|
||||
spin_unlock(&rbio->bio_list_lock);
|
||||
spin_unlock_irqrestore(&h->lock, flags);
|
||||
|
||||
@ -843,8 +843,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int i;
|
||||
|
||||
WARN_ON(atomic_read(&rbio->refs) < 0);
|
||||
if (!atomic_dec_and_test(&rbio->refs))
|
||||
if (!refcount_dec_and_test(&rbio->refs))
|
||||
return;
|
||||
|
||||
WARN_ON(!list_empty(&rbio->stripe_cache));
|
||||
@ -997,7 +996,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||
rbio->stripe_npages = stripe_npages;
|
||||
rbio->faila = -1;
|
||||
rbio->failb = -1;
|
||||
atomic_set(&rbio->refs, 1);
|
||||
refcount_set(&rbio->refs, 1);
|
||||
atomic_set(&rbio->error, 0);
|
||||
atomic_set(&rbio->stripes_pending, 0);
|
||||
|
||||
@ -2118,6 +2117,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int ret;
|
||||
|
||||
if (generic_io) {
|
||||
ASSERT(bbio->mirror_num == mirror_num);
|
||||
btrfs_io_bio(bio)->mirror_num = mirror_num;
|
||||
}
|
||||
|
||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
||||
if (IS_ERR(rbio)) {
|
||||
if (generic_io)
|
||||
@ -2194,6 +2198,8 @@ static void read_rebuild_work(struct btrfs_work *work)
|
||||
/*
|
||||
* The following code is used to scrub/replace the parity stripe
|
||||
*
|
||||
* Caller must have already increased bio_counter for getting @bbio.
|
||||
*
|
||||
* Note: We need make sure all the pages that add into the scrub/replace
|
||||
* raid bio are correct and not be changed during the scrub/replace. That
|
||||
* is those pages just hold metadata or file data with checksum.
|
||||
@ -2231,6 +2237,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
ASSERT(rbio->stripe_npages == stripe_nsectors);
|
||||
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
|
||||
|
||||
/*
|
||||
* We have already increased bio_counter when getting bbio, record it
|
||||
* so we can free it at rbio_orig_end_io().
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
return rbio;
|
||||
}
|
||||
|
||||
@ -2673,6 +2685,12 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* When we get bbio, we have already increased bio_counter, record it
|
||||
* so we can free it at rbio_orig_end_io()
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
return rbio;
|
||||
}
|
||||
|
||||
|
@ -209,9 +209,9 @@ cleanup:
|
||||
return;
|
||||
}
|
||||
|
||||
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb, int err)
|
||||
int btree_readahead_hook(struct extent_buffer *eb, int err)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
int ret = 0;
|
||||
struct reada_extent *re;
|
||||
|
||||
@ -235,10 +235,10 @@ start_machine:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *dev, u64 logical,
|
||||
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||
struct btrfs_bio *bbio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
int ret;
|
||||
struct reada_zone *zone;
|
||||
struct btrfs_block_group_cache *cache = NULL;
|
||||
@ -270,6 +270,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
||||
if (!zone)
|
||||
return NULL;
|
||||
|
||||
ret = radix_tree_preload(GFP_KERNEL);
|
||||
if (ret) {
|
||||
kfree(zone);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zone->start = start;
|
||||
zone->end = end;
|
||||
INIT_LIST_HEAD(&zone->list);
|
||||
@ -299,6 +305,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
|
||||
zone = NULL;
|
||||
}
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
return zone;
|
||||
}
|
||||
@ -313,7 +320,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_device *dev;
|
||||
struct btrfs_device *prev_dev;
|
||||
u32 blocksize;
|
||||
u64 length;
|
||||
int real_stripes;
|
||||
int nzones = 0;
|
||||
@ -334,7 +340,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
if (!re)
|
||||
return NULL;
|
||||
|
||||
blocksize = fs_info->nodesize;
|
||||
re->logical = logical;
|
||||
re->top = *top;
|
||||
INIT_LIST_HEAD(&re->extctl);
|
||||
@ -344,10 +349,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
/*
|
||||
* map block
|
||||
*/
|
||||
length = blocksize;
|
||||
length = fs_info->nodesize;
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||
&length, &bbio, 0);
|
||||
if (ret || !bbio || length < blocksize)
|
||||
if (ret || !bbio || length < fs_info->nodesize)
|
||||
goto error;
|
||||
|
||||
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
|
||||
@ -367,7 +372,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
if (!dev->bdev)
|
||||
continue;
|
||||
|
||||
zone = reada_find_zone(fs_info, dev, logical, bbio);
|
||||
zone = reada_find_zone(dev, logical, bbio);
|
||||
if (!zone)
|
||||
continue;
|
||||
|
||||
@ -386,6 +391,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = radix_tree_preload(GFP_KERNEL);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
/* insert extent in reada_tree + all per-device trees, all or nothing */
|
||||
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
@ -395,13 +404,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
re_exist->refcnt++;
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
|
||||
radix_tree_preload_end();
|
||||
goto error;
|
||||
}
|
||||
if (ret) {
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
|
||||
radix_tree_preload_end();
|
||||
goto error;
|
||||
}
|
||||
radix_tree_preload_end();
|
||||
prev_dev = NULL;
|
||||
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
|
||||
&fs_info->dev_replace);
|
||||
@ -639,9 +651,9 @@ static int reada_pick_zone(struct btrfs_device *dev)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *dev)
|
||||
static int reada_start_machine_dev(struct btrfs_device *dev)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
struct reada_extent *re = NULL;
|
||||
int mirror_num = 0;
|
||||
struct extent_buffer *eb = NULL;
|
||||
@ -754,8 +766,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (atomic_read(&device->reada_in_flight) <
|
||||
MAX_IN_FLIGHT)
|
||||
enqueued += reada_start_machine_dev(fs_info,
|
||||
device);
|
||||
enqueued += reada_start_machine_dev(device);
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
total += enqueued;
|
||||
|
@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_root_item *item = &root->root_item;
|
||||
struct timespec ct = current_fs_time(root->fs_info->sb);
|
||||
struct timespec ct;
|
||||
|
||||
ktime_get_real_ts(&ct);
|
||||
spin_lock(&root->root_item_lock);
|
||||
btrfs_set_root_ctransid(item, trans->transid);
|
||||
btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
|
||||
|
331
fs/btrfs/scrub.c
331
fs/btrfs/scrub.c
@ -64,7 +64,7 @@ struct scrub_ctx;
|
||||
#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
|
||||
|
||||
struct scrub_recover {
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
struct btrfs_bio *bbio;
|
||||
u64 map_length;
|
||||
};
|
||||
@ -112,7 +112,7 @@ struct scrub_block {
|
||||
struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
|
||||
int page_count;
|
||||
atomic_t outstanding_pages;
|
||||
atomic_t refs; /* free mem on transition to zero */
|
||||
refcount_t refs; /* free mem on transition to zero */
|
||||
struct scrub_ctx *sctx;
|
||||
struct scrub_parity *sparity;
|
||||
struct {
|
||||
@ -140,9 +140,9 @@ struct scrub_parity {
|
||||
|
||||
int nsectors;
|
||||
|
||||
int stripe_len;
|
||||
u64 stripe_len;
|
||||
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
|
||||
struct list_head spages;
|
||||
|
||||
@ -202,7 +202,7 @@ struct scrub_ctx {
|
||||
* doesn't free the scrub context before or while the workers are
|
||||
* doing the wakeup() call.
|
||||
*/
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
};
|
||||
|
||||
struct scrub_fixup_nodatasum {
|
||||
@ -240,6 +240,13 @@ struct scrub_warning {
|
||||
struct btrfs_device *dev;
|
||||
};
|
||||
|
||||
struct full_stripe_lock {
|
||||
struct rb_node node;
|
||||
u64 logical;
|
||||
u64 refs;
|
||||
struct mutex mutex;
|
||||
};
|
||||
|
||||
static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
|
||||
static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
|
||||
static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
|
||||
@ -305,7 +312,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
|
||||
|
||||
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
||||
{
|
||||
atomic_inc(&sctx->refs);
|
||||
refcount_inc(&sctx->refs);
|
||||
atomic_inc(&sctx->bios_in_flight);
|
||||
}
|
||||
|
||||
@ -348,6 +355,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
|
||||
scrub_pause_off(fs_info);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert new full stripe lock into full stripe locks tree
|
||||
*
|
||||
* Return pointer to existing or newly inserted full_stripe_lock structure if
|
||||
* everything works well.
|
||||
* Return ERR_PTR(-ENOMEM) if we failed to allocate memory
|
||||
*
|
||||
* NOTE: caller must hold full_stripe_locks_root->lock before calling this
|
||||
* function
|
||||
*/
|
||||
static struct full_stripe_lock *insert_full_stripe_lock(
|
||||
struct btrfs_full_stripe_locks_tree *locks_root,
|
||||
u64 fstripe_logical)
|
||||
{
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct full_stripe_lock *entry;
|
||||
struct full_stripe_lock *ret;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&locks_root->lock));
|
||||
|
||||
p = &locks_root->root.rb_node;
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct full_stripe_lock, node);
|
||||
if (fstripe_logical < entry->logical) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (fstripe_logical > entry->logical) {
|
||||
p = &(*p)->rb_right;
|
||||
} else {
|
||||
entry->refs++;
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert new lock */
|
||||
ret = kmalloc(sizeof(*ret), GFP_KERNEL);
|
||||
if (!ret)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ret->logical = fstripe_logical;
|
||||
ret->refs = 1;
|
||||
mutex_init(&ret->mutex);
|
||||
|
||||
rb_link_node(&ret->node, parent, p);
|
||||
rb_insert_color(&ret->node, &locks_root->root);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for a full stripe lock of a block group
|
||||
*
|
||||
* Return pointer to existing full stripe lock if found
|
||||
* Return NULL if not found
|
||||
*/
|
||||
static struct full_stripe_lock *search_full_stripe_lock(
|
||||
struct btrfs_full_stripe_locks_tree *locks_root,
|
||||
u64 fstripe_logical)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct full_stripe_lock *entry;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&locks_root->lock));
|
||||
|
||||
node = locks_root->root.rb_node;
|
||||
while (node) {
|
||||
entry = rb_entry(node, struct full_stripe_lock, node);
|
||||
if (fstripe_logical < entry->logical)
|
||||
node = node->rb_left;
|
||||
else if (fstripe_logical > entry->logical)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper to get full stripe logical from a normal bytenr.
|
||||
*
|
||||
* Caller must ensure @cache is a RAID56 block group.
|
||||
*/
|
||||
static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
|
||||
u64 bytenr)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
/*
|
||||
* Due to chunk item size limit, full stripe length should not be
|
||||
* larger than U32_MAX. Just a sanity check here.
|
||||
*/
|
||||
WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
|
||||
|
||||
/*
|
||||
* round_down() can only handle power of 2, while RAID56 full
|
||||
* stripe length can be 64KiB * n, so we need to manually round down.
|
||||
*/
|
||||
ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
|
||||
cache->full_stripe_len + cache->key.objectid;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a full stripe to avoid concurrency of recovery and read
|
||||
*
|
||||
* It's only used for profiles with parities (RAID5/6), for other profiles it
|
||||
* does nothing.
|
||||
*
|
||||
* Return 0 if we locked full stripe covering @bytenr, with a mutex held.
|
||||
* So caller must call unlock_full_stripe() at the same context.
|
||||
*
|
||||
* Return <0 if encounters error.
|
||||
*/
|
||||
static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
bool *locked_ret)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg_cache;
|
||||
struct btrfs_full_stripe_locks_tree *locks_root;
|
||||
struct full_stripe_lock *existing;
|
||||
u64 fstripe_start;
|
||||
int ret = 0;
|
||||
|
||||
*locked_ret = false;
|
||||
bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
if (!bg_cache) {
|
||||
ASSERT(0);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* Profiles not based on parity don't need full stripe lock */
|
||||
if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
|
||||
goto out;
|
||||
locks_root = &bg_cache->full_stripe_locks_root;
|
||||
|
||||
fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
|
||||
|
||||
/* Now insert the full stripe lock */
|
||||
mutex_lock(&locks_root->lock);
|
||||
existing = insert_full_stripe_lock(locks_root, fstripe_start);
|
||||
mutex_unlock(&locks_root->lock);
|
||||
if (IS_ERR(existing)) {
|
||||
ret = PTR_ERR(existing);
|
||||
goto out;
|
||||
}
|
||||
mutex_lock(&existing->mutex);
|
||||
*locked_ret = true;
|
||||
out:
|
||||
btrfs_put_block_group(bg_cache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock a full stripe.
|
||||
*
|
||||
* NOTE: Caller must ensure it's the same context calling corresponding
|
||||
* lock_full_stripe().
|
||||
*
|
||||
* Return 0 if we unlock full stripe without problem.
|
||||
* Return <0 for error
|
||||
*/
|
||||
static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
bool locked)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg_cache;
|
||||
struct btrfs_full_stripe_locks_tree *locks_root;
|
||||
struct full_stripe_lock *fstripe_lock;
|
||||
u64 fstripe_start;
|
||||
bool freeit = false;
|
||||
int ret = 0;
|
||||
|
||||
/* If we didn't acquire full stripe lock, no need to continue */
|
||||
if (!locked)
|
||||
return 0;
|
||||
|
||||
bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
if (!bg_cache) {
|
||||
ASSERT(0);
|
||||
return -ENOENT;
|
||||
}
|
||||
if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
|
||||
goto out;
|
||||
|
||||
locks_root = &bg_cache->full_stripe_locks_root;
|
||||
fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
|
||||
|
||||
mutex_lock(&locks_root->lock);
|
||||
fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
|
||||
/* Unpaired unlock_full_stripe() detected */
|
||||
if (!fstripe_lock) {
|
||||
WARN_ON(1);
|
||||
ret = -ENOENT;
|
||||
mutex_unlock(&locks_root->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fstripe_lock->refs == 0) {
|
||||
WARN_ON(1);
|
||||
btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
|
||||
fstripe_lock->logical);
|
||||
} else {
|
||||
fstripe_lock->refs--;
|
||||
}
|
||||
|
||||
if (fstripe_lock->refs == 0) {
|
||||
rb_erase(&fstripe_lock->node, &locks_root->root);
|
||||
freeit = true;
|
||||
}
|
||||
mutex_unlock(&locks_root->lock);
|
||||
|
||||
mutex_unlock(&fstripe_lock->mutex);
|
||||
if (freeit)
|
||||
kfree(fstripe_lock);
|
||||
out:
|
||||
btrfs_put_block_group(bg_cache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* used for workers that require transaction commits (i.e., for the
|
||||
* NOCOW case)
|
||||
@ -356,7 +579,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
|
||||
atomic_inc(&sctx->refs);
|
||||
refcount_inc(&sctx->refs);
|
||||
/*
|
||||
* increment scrubs_running to prevent cancel requests from
|
||||
* completing as long as a worker is running. we must also
|
||||
@ -447,7 +670,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
|
||||
|
||||
static void scrub_put_ctx(struct scrub_ctx *sctx)
|
||||
{
|
||||
if (atomic_dec_and_test(&sctx->refs))
|
||||
if (refcount_dec_and_test(&sctx->refs))
|
||||
scrub_free_ctx(sctx);
|
||||
}
|
||||
|
||||
@ -462,7 +685,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
|
||||
sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
|
||||
if (!sctx)
|
||||
goto nomem;
|
||||
atomic_set(&sctx->refs, 1);
|
||||
refcount_set(&sctx->refs, 1);
|
||||
sctx->is_dev_replace = is_dev_replace;
|
||||
sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
|
||||
sctx->curr = -1;
|
||||
@ -857,12 +1080,14 @@ out:
|
||||
|
||||
static inline void scrub_get_recover(struct scrub_recover *recover)
|
||||
{
|
||||
atomic_inc(&recover->refs);
|
||||
refcount_inc(&recover->refs);
|
||||
}
|
||||
|
||||
static inline void scrub_put_recover(struct scrub_recover *recover)
|
||||
static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
|
||||
struct scrub_recover *recover)
|
||||
{
|
||||
if (atomic_dec_and_test(&recover->refs)) {
|
||||
if (refcount_dec_and_test(&recover->refs)) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(recover->bbio);
|
||||
kfree(recover);
|
||||
}
|
||||
@ -892,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||
int mirror_index;
|
||||
int page_num;
|
||||
int success;
|
||||
bool full_stripe_locked;
|
||||
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
@ -917,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||
have_csum = sblock_to_check->pagev[0]->have_csum;
|
||||
dev = sblock_to_check->pagev[0]->dev;
|
||||
|
||||
/*
|
||||
* For RAID5/6, race can happen for a different device scrub thread.
|
||||
* For data corruption, Parity and Data threads will both try
|
||||
* to recovery the data.
|
||||
* Race can lead to doubly added csum error, or even unrecoverable
|
||||
* error.
|
||||
*/
|
||||
ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
|
||||
if (ret < 0) {
|
||||
spin_lock(&sctx->stat_lock);
|
||||
if (ret == -ENOMEM)
|
||||
sctx->stat.malloc_errors++;
|
||||
sctx->stat.read_errors++;
|
||||
sctx->stat.uncorrectable_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (sctx->is_dev_replace && !is_metadata && !have_csum) {
|
||||
sblocks_for_recheck = NULL;
|
||||
goto nodatasum_case;
|
||||
@ -1241,7 +1485,7 @@ out:
|
||||
sblock->pagev[page_index]->sblock = NULL;
|
||||
recover = sblock->pagev[page_index]->recover;
|
||||
if (recover) {
|
||||
scrub_put_recover(recover);
|
||||
scrub_put_recover(fs_info, recover);
|
||||
sblock->pagev[page_index]->recover =
|
||||
NULL;
|
||||
}
|
||||
@ -1251,6 +1495,9 @@ out:
|
||||
kfree(sblocks_for_recheck);
|
||||
}
|
||||
|
||||
ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1330,20 +1577,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||
* with a length of PAGE_SIZE, each returned stripe
|
||||
* represents one mirror
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
|
||||
logical, &mapped_length, &bbio, 0, 1);
|
||||
logical, &mapped_length, &bbio);
|
||||
if (ret || !bbio || mapped_length < sublen) {
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
||||
if (!recover) {
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
atomic_set(&recover->refs, 1);
|
||||
refcount_set(&recover->refs, 1);
|
||||
recover->bbio = bbio;
|
||||
recover->map_length = mapped_length;
|
||||
|
||||
@ -1365,7 +1615,7 @@ leave_nomem:
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
scrub_put_recover(recover);
|
||||
scrub_put_recover(fs_info, recover);
|
||||
return -ENOMEM;
|
||||
}
|
||||
scrub_page_get(page);
|
||||
@ -1407,7 +1657,7 @@ leave_nomem:
|
||||
scrub_get_recover(recover);
|
||||
page->recover = recover;
|
||||
}
|
||||
scrub_put_recover(recover);
|
||||
scrub_put_recover(fs_info, recover);
|
||||
length -= sublen;
|
||||
logical += sublen;
|
||||
page_index++;
|
||||
@ -1497,14 +1747,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
||||
|
||||
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
||||
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
|
||||
if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
|
||||
if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
|
||||
page->io_error = 1;
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
} else {
|
||||
bio->bi_iter.bi_sector = page->physical >> 9;
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
|
||||
if (btrfsic_submit_bio_wait(bio))
|
||||
if (btrfsic_submit_bio_wait(bio)) {
|
||||
page->io_error = 1;
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
@ -1634,7 +1888,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
|
||||
if (spage->io_error) {
|
||||
void *mapped_buffer = kmap_atomic(spage->page);
|
||||
|
||||
memset(mapped_buffer, 0, PAGE_SIZE);
|
||||
clear_page(mapped_buffer);
|
||||
flush_dcache_page(spage->page);
|
||||
kunmap_atomic(mapped_buffer);
|
||||
}
|
||||
@ -1998,12 +2252,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
|
||||
|
||||
static void scrub_block_get(struct scrub_block *sblock)
|
||||
{
|
||||
atomic_inc(&sblock->refs);
|
||||
refcount_inc(&sblock->refs);
|
||||
}
|
||||
|
||||
static void scrub_block_put(struct scrub_block *sblock)
|
||||
{
|
||||
if (atomic_dec_and_test(&sblock->refs)) {
|
||||
if (refcount_dec_and_test(&sblock->refs)) {
|
||||
int i;
|
||||
|
||||
if (sblock->sparity)
|
||||
@ -2187,8 +2441,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||
&length, &bbio, 0, 1);
|
||||
&length, &bbio);
|
||||
if (ret || !bbio || !bbio->raid_map)
|
||||
goto bbio_out;
|
||||
|
||||
@ -2231,6 +2486,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
||||
rbio_out:
|
||||
bio_put(bio);
|
||||
bbio_out:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(bbio);
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
@ -2255,7 +2511,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
||||
|
||||
/* one ref inside this function, plus one for each page added to
|
||||
* a bio later on */
|
||||
atomic_set(&sblock->refs, 1);
|
||||
refcount_set(&sblock->refs, 1);
|
||||
sblock->sctx = sctx;
|
||||
sblock->no_io_error_seen = 1;
|
||||
|
||||
@ -2385,7 +2641,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
||||
unsigned long *bitmap,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
u32 offset;
|
||||
u64 offset;
|
||||
int nsectors;
|
||||
int sectorsize = sparity->sctx->fs_info->sectorsize;
|
||||
|
||||
@ -2395,8 +2651,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
||||
}
|
||||
|
||||
start -= sparity->logic_start;
|
||||
start = div_u64_rem(start, sparity->stripe_len, &offset);
|
||||
offset /= sectorsize;
|
||||
start = div64_u64_rem(start, sparity->stripe_len, &offset);
|
||||
offset = div_u64(offset, sectorsize);
|
||||
nsectors = (int)len / sectorsize;
|
||||
|
||||
if (offset + nsectors <= sparity->nsectors) {
|
||||
@ -2555,7 +2811,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
|
||||
|
||||
/* one ref inside this function, plus one for each page added to
|
||||
* a bio later on */
|
||||
atomic_set(&sblock->refs, 1);
|
||||
refcount_set(&sblock->refs, 1);
|
||||
sblock->sctx = sctx;
|
||||
sblock->no_io_error_seen = 1;
|
||||
sblock->sparity = sparity;
|
||||
@ -2694,7 +2950,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
|
||||
for (i = 0; i < nr_data_stripes(map); i++) {
|
||||
*offset = last_offset + i * map->stripe_len;
|
||||
|
||||
stripe_nr = div_u64(*offset, map->stripe_len);
|
||||
stripe_nr = div64_u64(*offset, map->stripe_len);
|
||||
stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
|
||||
|
||||
/* Work out the disk rotation on this stripe-set */
|
||||
@ -2765,7 +3021,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
struct bio *bio;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
struct scrub_page *spage;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
u64 length;
|
||||
int ret;
|
||||
@ -2775,8 +3030,10 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
goto out;
|
||||
|
||||
length = sparity->logic_end - sparity->logic_start;
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
|
||||
&length, &bbio, 0, 1);
|
||||
&length, &bbio);
|
||||
if (ret || !bbio || !bbio->raid_map)
|
||||
goto bbio_out;
|
||||
|
||||
@ -2795,9 +3052,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
if (!rbio)
|
||||
goto rbio_out;
|
||||
|
||||
list_for_each_entry(spage, &sparity->spages, list)
|
||||
raid56_add_scrub_pages(rbio, spage->page, spage->logical);
|
||||
|
||||
scrub_pending_bio_inc(sctx);
|
||||
raid56_parity_submit_scrub_rbio(rbio);
|
||||
return;
|
||||
@ -2805,6 +3059,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
rbio_out:
|
||||
bio_put(bio);
|
||||
bbio_out:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(bbio);
|
||||
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
||||
sparity->nsectors);
|
||||
@ -2822,12 +3077,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
|
||||
|
||||
static void scrub_parity_get(struct scrub_parity *sparity)
|
||||
{
|
||||
atomic_inc(&sparity->refs);
|
||||
refcount_inc(&sparity->refs);
|
||||
}
|
||||
|
||||
static void scrub_parity_put(struct scrub_parity *sparity)
|
||||
{
|
||||
if (!atomic_dec_and_test(&sparity->refs))
|
||||
if (!refcount_dec_and_test(&sparity->refs))
|
||||
return;
|
||||
|
||||
scrub_parity_check_and_repair(sparity);
|
||||
@ -2879,7 +3134,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||
sparity->scrub_dev = sdev;
|
||||
sparity->logic_start = logic_start;
|
||||
sparity->logic_end = logic_end;
|
||||
atomic_set(&sparity->refs, 1);
|
||||
refcount_set(&sparity->refs, 1);
|
||||
INIT_LIST_HEAD(&sparity->spages);
|
||||
sparity->dbitmap = sparity->bitmap;
|
||||
sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
|
||||
@ -3098,7 +3353,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
|
||||
physical = map->stripes[num].physical;
|
||||
offset = 0;
|
||||
nstripes = div_u64(length, map->stripe_len);
|
||||
nstripes = div64_u64(length, map->stripe_len);
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
offset = map->stripe_len * num;
|
||||
increment = map->stripe_len * map->num_stripes;
|
||||
|
@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
|
||||
while (key.offset < ekey->offset + left_len) {
|
||||
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
|
||||
right_type = btrfs_file_extent_type(eb, ei);
|
||||
if (right_type != BTRFS_FILE_EXTENT_REG) {
|
||||
if (right_type != BTRFS_FILE_EXTENT_REG &&
|
||||
right_type != BTRFS_FILE_EXTENT_INLINE) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
|
||||
right_len = btrfs_file_extent_num_bytes(eb, ei);
|
||||
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
right_len = btrfs_file_extent_inline_len(eb, slot, ei);
|
||||
right_len = PAGE_ALIGN(right_len);
|
||||
} else {
|
||||
right_len = btrfs_file_extent_num_bytes(eb, ei);
|
||||
}
|
||||
right_offset = btrfs_file_extent_offset(eb, ei);
|
||||
right_gen = btrfs_file_extent_generation(eb, ei);
|
||||
|
||||
@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We just wanted to see if when we have an inline extent, what
|
||||
* follows it is a regular extent (wanted to check the above
|
||||
* condition for inline extents too). This should normally not
|
||||
* happen but it's possible for example when we have an inline
|
||||
* compressed extent representing data with a size matching
|
||||
* the page size (currently the same as sector size).
|
||||
*/
|
||||
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
left_offset_fixed = left_offset;
|
||||
if (key.offset < ekey->offset) {
|
||||
/* Fix the right offset for 2a and 7. */
|
||||
|
@ -1795,8 +1795,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
}
|
||||
|
||||
if (fs_info->fs_devices->missing_devices >
|
||||
fs_info->num_tolerated_disk_barrier_failures &&
|
||||
!(*flags & MS_RDONLY)) {
|
||||
fs_info->num_tolerated_disk_barrier_failures) {
|
||||
btrfs_warn(fs_info,
|
||||
"too many missing devices, writeable remount is not allowed");
|
||||
ret = -EACCES;
|
||||
|
@ -237,7 +237,6 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
trans->transid = 1;
|
||||
INIT_LIST_HEAD(&trans->qgroup_ref_list);
|
||||
trans->type = __TRANS_DUMMY;
|
||||
}
|
||||
|
||||
|
@ -60,8 +60,8 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
|
||||
|
||||
void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
||||
{
|
||||
WARN_ON(atomic_read(&transaction->use_count) == 0);
|
||||
if (atomic_dec_and_test(&transaction->use_count)) {
|
||||
WARN_ON(refcount_read(&transaction->use_count) == 0);
|
||||
if (refcount_dec_and_test(&transaction->use_count)) {
|
||||
BUG_ON(!list_empty(&transaction->list));
|
||||
WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
|
||||
if (transaction->delayed_refs.pending_csums)
|
||||
@ -207,7 +207,7 @@ loop:
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
atomic_inc(&cur_trans->num_writers);
|
||||
extwriter_counter_inc(cur_trans, type);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
@ -257,7 +257,7 @@ loop:
|
||||
* One for this trans handle, one so it will live on until we
|
||||
* commit the transaction.
|
||||
*/
|
||||
atomic_set(&cur_trans->use_count, 2);
|
||||
refcount_set(&cur_trans->use_count, 2);
|
||||
atomic_set(&cur_trans->pending_ordered, 0);
|
||||
cur_trans->flags = 0;
|
||||
cur_trans->start_time = get_seconds();
|
||||
@ -432,7 +432,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
cur_trans = fs_info->running_transaction;
|
||||
if (cur_trans && is_transaction_blocked(cur_trans)) {
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
wait_event(fs_info->transaction_wait,
|
||||
@ -572,7 +572,6 @@ again:
|
||||
|
||||
h->type = type;
|
||||
h->can_flush_pending_bgs = true;
|
||||
INIT_LIST_HEAD(&h->qgroup_ref_list);
|
||||
INIT_LIST_HEAD(&h->new_bgs);
|
||||
|
||||
smp_mb();
|
||||
@ -744,7 +743,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
|
||||
list_for_each_entry(t, &fs_info->trans_list, list) {
|
||||
if (t->transid == transid) {
|
||||
cur_trans = t;
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
@ -773,7 +772,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
|
||||
if (t->state == TRANS_STATE_COMPLETED)
|
||||
break;
|
||||
cur_trans = t;
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -917,7 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
wake_up_process(info->transaction_kthread);
|
||||
err = -EIO;
|
||||
}
|
||||
assert_qgroups_uptodate(trans);
|
||||
|
||||
kmem_cache_free(btrfs_trans_handle_cachep, trans);
|
||||
if (must_run_delayed_refs) {
|
||||
@ -1839,7 +1837,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
|
||||
|
||||
/* take transaction reference */
|
||||
cur_trans = trans->transaction;
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
|
||||
btrfs_end_transaction(trans);
|
||||
|
||||
@ -2015,7 +2013,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
atomic_inc(&cur_trans->use_count);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
ret = btrfs_end_transaction(trans);
|
||||
|
||||
wait_for_commit(cur_trans);
|
||||
@ -2035,7 +2033,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
prev_trans = list_entry(cur_trans->list.prev,
|
||||
struct btrfs_transaction, list);
|
||||
if (prev_trans->state != TRANS_STATE_COMPLETED) {
|
||||
atomic_inc(&prev_trans->use_count);
|
||||
refcount_inc(&prev_trans->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
wait_for_commit(prev_trans);
|
||||
@ -2130,13 +2128,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
goto scrub_continue;
|
||||
}
|
||||
|
||||
/* Reocrd old roots for later qgroup accounting */
|
||||
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* make sure none of the code above managed to slip in a
|
||||
* delayed item
|
||||
@ -2178,6 +2169,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
*/
|
||||
btrfs_free_log_root_tree(trans, fs_info);
|
||||
|
||||
/*
|
||||
* commit_fs_roots() can call btrfs_save_ino_cache(), which generates
|
||||
* new delayed refs. Must handle them or qgroup can be wrong.
|
||||
*/
|
||||
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
|
||||
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since fs roots are all committed, we can get a quite accurate
|
||||
* new_roots. So let's do quota accounting.
|
||||
@ -2223,7 +2232,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
switch_commit_roots(cur_trans, fs_info);
|
||||
|
||||
assert_qgroups_uptodate(trans);
|
||||
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
||||
ASSERT(list_empty(&cur_trans->io_bgs));
|
||||
update_super_roots(fs_info);
|
||||
|
@ -18,6 +18,8 @@
|
||||
|
||||
#ifndef __BTRFS_TRANSACTION__
|
||||
#define __BTRFS_TRANSACTION__
|
||||
|
||||
#include <linux/refcount.h>
|
||||
#include "btrfs_inode.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "ctree.h"
|
||||
@ -49,7 +51,7 @@ struct btrfs_transaction {
|
||||
* transaction can end
|
||||
*/
|
||||
atomic_t num_writers;
|
||||
atomic_t use_count;
|
||||
refcount_t use_count;
|
||||
atomic_t pending_ordered;
|
||||
|
||||
unsigned long flags;
|
||||
@ -125,8 +127,6 @@ struct btrfs_trans_handle {
|
||||
unsigned int type;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct seq_list delayed_ref_elem;
|
||||
struct list_head qgroup_ref_list;
|
||||
struct list_head new_bgs;
|
||||
};
|
||||
|
||||
|
@ -4196,7 +4196,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
if (em->generation <= test_gen)
|
||||
continue;
|
||||
/* Need a ref to keep it from getting evicted from cache */
|
||||
atomic_inc(&em->refs);
|
||||
refcount_inc(&em->refs);
|
||||
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
|
||||
list_add_tail(&em->list, &extents);
|
||||
num++;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -123,7 +123,6 @@ struct btrfs_device {
|
||||
struct list_head resized_list;
|
||||
|
||||
/* for sending down flush barriers */
|
||||
int nobarriers;
|
||||
struct bio *flush_bio;
|
||||
struct completion flush_wait;
|
||||
|
||||
@ -298,7 +297,7 @@ struct btrfs_bio;
|
||||
typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
|
||||
|
||||
struct btrfs_bio {
|
||||
atomic_t refs;
|
||||
refcount_t refs;
|
||||
atomic_t stripes_pending;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
u64 map_type; /* get from map_lookup->type */
|
||||
@ -400,8 +399,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
struct btrfs_bio **bbio_ret, int mirror_num);
|
||||
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_bio **bbio_ret, int mirror_num,
|
||||
int need_raid_map);
|
||||
struct btrfs_bio **bbio_ret);
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
|
||||
u64 chunk_start, u64 physical, u64 devid,
|
||||
u64 **logical, int *naddrs, int *stripe_len);
|
||||
@ -475,7 +473,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *tgtdev);
|
||||
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
|
||||
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
|
||||
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 len, int mirror_num);
|
||||
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_mapping_tree *map_tree,
|
||||
|
@ -12,6 +12,7 @@ struct btrfs_root;
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_inode;
|
||||
struct extent_map;
|
||||
struct btrfs_file_extent_item;
|
||||
struct btrfs_ordered_extent;
|
||||
struct btrfs_delayed_ref_node;
|
||||
struct btrfs_delayed_tree_ref;
|
||||
@ -24,6 +25,7 @@ struct extent_buffer;
|
||||
struct btrfs_work;
|
||||
struct __btrfs_workqueue;
|
||||
struct btrfs_qgroup_extent_record;
|
||||
struct btrfs_qgroup;
|
||||
|
||||
#define show_ref_type(type) \
|
||||
__print_symbolic(type, \
|
||||
@ -54,6 +56,12 @@ struct btrfs_qgroup_extent_record;
|
||||
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
|
||||
obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
|
||||
|
||||
#define show_fi_type(type) \
|
||||
__print_symbolic(type, \
|
||||
{ BTRFS_FILE_EXTENT_INLINE, "INLINE" }, \
|
||||
{ BTRFS_FILE_EXTENT_REG, "REG" }, \
|
||||
{ BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"})
|
||||
|
||||
#define BTRFS_GROUP_FLAGS \
|
||||
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
|
||||
{ BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
|
||||
@ -213,7 +221,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
|
||||
__entry->block_start = map->block_start;
|
||||
__entry->block_len = map->block_len;
|
||||
__entry->flags = map->flags;
|
||||
__entry->refs = atomic_read(&map->refs);
|
||||
__entry->refs = refcount_read(&map->refs);
|
||||
__entry->compress_type = map->compress_type;
|
||||
),
|
||||
|
||||
@ -232,6 +240,138 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
|
||||
__entry->refs, __entry->compress_type)
|
||||
);
|
||||
|
||||
/* file extent item */
|
||||
DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, start),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, root_obj )
|
||||
__field( u64, ino )
|
||||
__field( loff_t, isize )
|
||||
__field( u64, disk_isize )
|
||||
__field( u64, num_bytes )
|
||||
__field( u64, ram_bytes )
|
||||
__field( u64, disk_bytenr )
|
||||
__field( u64, disk_num_bytes )
|
||||
__field( u64, extent_offset )
|
||||
__field( u8, extent_type )
|
||||
__field( u8, compression )
|
||||
__field( u64, extent_start )
|
||||
__field( u64, extent_end )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(bi->root->fs_info,
|
||||
__entry->root_obj = bi->root->objectid;
|
||||
__entry->ino = btrfs_ino(bi);
|
||||
__entry->isize = bi->vfs_inode.i_size;
|
||||
__entry->disk_isize = bi->disk_i_size;
|
||||
__entry->num_bytes = btrfs_file_extent_num_bytes(l, fi);
|
||||
__entry->ram_bytes = btrfs_file_extent_ram_bytes(l, fi);
|
||||
__entry->disk_bytenr = btrfs_file_extent_disk_bytenr(l, fi);
|
||||
__entry->disk_num_bytes = btrfs_file_extent_disk_num_bytes(l, fi);
|
||||
__entry->extent_offset = btrfs_file_extent_offset(l, fi);
|
||||
__entry->extent_type = btrfs_file_extent_type(l, fi);
|
||||
__entry->compression = btrfs_file_extent_compression(l, fi);
|
||||
__entry->extent_start = start;
|
||||
__entry->extent_end = (start + __entry->num_bytes);
|
||||
),
|
||||
|
||||
TP_printk_btrfs(
|
||||
"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
|
||||
"file extent range=[%llu %llu] "
|
||||
"(num_bytes=%llu ram_bytes=%llu disk_bytenr=%llu "
|
||||
"disk_num_bytes=%llu extent_offset=%llu type=%s "
|
||||
"compression=%u",
|
||||
show_root_type(__entry->root_obj), __entry->ino,
|
||||
__entry->isize,
|
||||
__entry->disk_isize, __entry->extent_start,
|
||||
__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
|
||||
__entry->disk_bytenr, __entry->disk_num_bytes,
|
||||
__entry->extent_offset, show_fi_type(__entry->extent_type),
|
||||
__entry->compression)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(
|
||||
btrfs__file_extent_item_inline,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, int slot, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, slot, start),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, root_obj )
|
||||
__field( u64, ino )
|
||||
__field( loff_t, isize )
|
||||
__field( u64, disk_isize )
|
||||
__field( u8, extent_type )
|
||||
__field( u8, compression )
|
||||
__field( u64, extent_start )
|
||||
__field( u64, extent_end )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(
|
||||
bi->root->fs_info,
|
||||
__entry->root_obj = bi->root->objectid;
|
||||
__entry->ino = btrfs_ino(bi);
|
||||
__entry->isize = bi->vfs_inode.i_size;
|
||||
__entry->disk_isize = bi->disk_i_size;
|
||||
__entry->extent_type = btrfs_file_extent_type(l, fi);
|
||||
__entry->compression = btrfs_file_extent_compression(l, fi);
|
||||
__entry->extent_start = start;
|
||||
__entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi));
|
||||
),
|
||||
|
||||
TP_printk_btrfs(
|
||||
"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
|
||||
"file extent range=[%llu %llu] "
|
||||
"extent_type=%s compression=%u",
|
||||
show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
|
||||
__entry->disk_isize, __entry->extent_start,
|
||||
__entry->extent_end, show_fi_type(__entry->extent_type),
|
||||
__entry->compression)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, start)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, start)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, int slot, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, slot, start)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(
|
||||
btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline,
|
||||
|
||||
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
|
||||
struct btrfs_file_extent_item *fi, int slot, u64 start),
|
||||
|
||||
TP_ARGS(bi, l, fi, slot, start)
|
||||
);
|
||||
|
||||
#define show_ordered_flags(flags) \
|
||||
__print_flags(flags, "|", \
|
||||
{ (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
|
||||
@ -275,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
|
||||
__entry->bytes_left = ordered->bytes_left;
|
||||
__entry->flags = ordered->flags;
|
||||
__entry->compress_type = ordered->compress_type;
|
||||
__entry->refs = atomic_read(&ordered->refs);
|
||||
__entry->refs = refcount_read(&ordered->refs);
|
||||
__entry->root_objectid =
|
||||
BTRFS_I(inode)->root->root_key.objectid;
|
||||
__entry->truncated_len = ordered->truncated_len;
|
||||
@ -1475,6 +1615,49 @@ TRACE_EVENT(qgroup_update_counters,
|
||||
__entry->cur_new_count)
|
||||
);
|
||||
|
||||
TRACE_EVENT(qgroup_update_reserve,
|
||||
|
||||
TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
|
||||
s64 diff),
|
||||
|
||||
TP_ARGS(fs_info, qgroup, diff),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, qgid )
|
||||
__field( u64, cur_reserved )
|
||||
__field( s64, diff )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->qgid = qgroup->qgroupid;
|
||||
__entry->cur_reserved = qgroup->reserved;
|
||||
__entry->diff = diff;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
|
||||
__entry->qgid, __entry->cur_reserved, __entry->diff)
|
||||
);
|
||||
|
||||
TRACE_EVENT(qgroup_meta_reserve,
|
||||
|
||||
TP_PROTO(struct btrfs_root *root, s64 diff),
|
||||
|
||||
TP_ARGS(root, diff),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, refroot )
|
||||
__field( s64, diff )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(root->fs_info,
|
||||
__entry->refroot = root->objectid;
|
||||
__entry->diff = diff;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
|
||||
show_root_type(__entry->refroot), __entry->diff)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_BTRFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags {
|
||||
struct btrfs_balance_args {
|
||||
__u64 profiles;
|
||||
union {
|
||||
__le64 usage;
|
||||
__u64 usage;
|
||||
struct {
|
||||
__le32 usage_min;
|
||||
__le32 usage_max;
|
||||
__u32 usage_min;
|
||||
__u32 usage_max;
|
||||
};
|
||||
};
|
||||
__u64 devid;
|
||||
@ -324,8 +324,8 @@ struct btrfs_balance_args {
|
||||
* Process chunks that cross stripes_min..stripes_max devices,
|
||||
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
|
||||
*/
|
||||
__le32 stripes_min;
|
||||
__le32 stripes_max;
|
||||
__u32 stripes_min;
|
||||
__u32 stripes_max;
|
||||
|
||||
__u64 unused[6];
|
||||
} __attribute__ ((__packed__));
|
||||
|
Loading…
Reference in New Issue
Block a user