for-6.0-rc1-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmL/dJgACgkQxWXV+ddt
 WDtUmg/9Fje7L+jAtQLqvJYvvGCFCCs8gkm+Rwh9MLYHcEDnBtRWzDWYTKcfO9WW
 mPmiNlNPAbnAw/9apeJDvFOrd4Vqr8ZD3Vr0flXl5EJ9QXSTL6JXfaiLS1o6rQ0q
 OXqxVTh5B5aEmfsEhyJBLzsZGdISJbr60dAE8/ZDX9wo8cDavZ6YxToIroUeizGO
 dx2DY5A7OQRKTEf4aJgu4zcm1Sq+U5A8M1pcfU4Rhb/YapVgcCc2wrdrw4NOkNJu
 B/NZFcD0BcF2bZW7uHT5vr8rGzj2xZUCkuggBQ6/0/h7OdRXIzHCanMw4lPy602v
 IfZASF7eYkq7FHRbANj/WAYHOFl41vS+whqZ+sEI/qrW+ZyrODIzS67sbU/Bsa7+
 ZoL6RSlIbcMgz7XVqcc5d8bKNK/Hc3MCyMWlLT0XScm05BiJy5O2iEZ7UOJ4r8E+
 6J/pFD1bNdacp6UrzwEjyXuSufvp4pJdNWn5ttIWYTBygXMT8AJ403yIheiV3KA3
 SkoMj4A54tF3G7NhkzfR5sC7hlgcA0njJLzloicmNgP7E12vmcDL2rTdTt/Yl0cw
 3w6ztJS+1sWocFSJ43lE2muHlj7wW7QDYPvul1t6yWgO9wtWECo7c/ASeRl0zPkP
 atAJtsr3uV9/aA2ae9QeWzut1W3hbE5NFQOLPcF/iXN+kxMmesI=
 =HgV0
 -----END PGP SIGNATURE-----

Merge tag 'for-6.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few short fixes and a lockdep warning fix (needs moving some code):

   - tree-log replay fixes:
      - fix error handling when looking up extent refs
      - fix warning when setting inode number of links

   - relocation fixes:
      - reset block group read-only status when relocation fails
      - unset control structure if transaction fails when starting
        to process a block group
      - add lockdep annotations to fix a warning during relocation
        where blocks temporarily belong to another tree and can lead
        to reversed dependencies

   - tree-checker verifies that extent items don't overlap"

* tag 'for-6.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: tree-checker: check for overlapping extent items
  btrfs: fix warning during log replay when bumping inode link count
  btrfs: fix lost error handling when looking up extended ref on log replay
  btrfs: fix lockdep splat with reloc root extent buffers
  btrfs: move lockdep class helpers to locking.c
  btrfs: unset reloc control if transaction commit fails in prepare_to_relocate()
  btrfs: reset RO counter on block group if we fail to relocate
This commit is contained in:
Linus Torvalds 2022-08-19 13:33:48 -07:00
commit 42c54d5491
12 changed files with 176 additions and 101 deletions

View File

@ -1640,9 +1640,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
if (ret)
if (ret) {
btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
}
next:
btrfs_put_block_group(bg);

View File

@ -2075,6 +2075,9 @@ cow_done:
if (!p->skip_locking) {
level = btrfs_header_level(b);
btrfs_maybe_reset_lockdep_class(root, b);
if (level <= write_lock_level) {
btrfs_tree_lock(b);
p->locks[level] = BTRFS_WRITE_LOCK;

View File

@ -1173,6 +1173,8 @@ enum {
BTRFS_ROOT_ORPHAN_CLEANUP,
/* This root has a drop operation that was started previously. */
BTRFS_ROOT_UNFINISHED_DROP,
/* This reloc root needs to have its buffers lockdep class reset. */
BTRFS_ROOT_RESET_LOCKDEP_CLASS,
};
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)

View File

@ -86,88 +86,6 @@ struct async_submit_bio {
blk_status_t status;
};
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
* eb, the lockdep key is determined by the btrfs_root it belongs to and
* the level the eb occupies in the tree.
*
* Different roots are used for different purposes and may nest inside each
* other and they require separate keysets. As lockdep keys should be
* static, assign keysets according to the purpose of the root as indicated
* by btrfs_root->root_key.objectid. This ensures that all special purpose
* roots have separate keysets.
*
* Lock-nesting across peer nodes is always done with the immediate parent
* node locked thus preventing deadlock. As lockdep doesn't know this, use
* subclass to avoid triggering lockdep warning in such cases.
*
* The key is set by the readpage_end_io_hook after the buffer has passed
* csum validation but before the pages are unlocked. It is also set by
* btrfs_init_new_buffer on freshly allocated blocks.
*
* We also add a check to make sure the highest level of the tree is the
* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
* needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
#define DEFINE_LEVEL(stem, level) \
.names[level] = "btrfs-" stem "-0" #level,
#define DEFINE_NAME(stem) \
DEFINE_LEVEL(stem, 0) \
DEFINE_LEVEL(stem, 1) \
DEFINE_LEVEL(stem, 2) \
DEFINE_LEVEL(stem, 3) \
DEFINE_LEVEL(stem, 4) \
DEFINE_LEVEL(stem, 5) \
DEFINE_LEVEL(stem, 6) \
DEFINE_LEVEL(stem, 7)
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
/* Longest entry: btrfs-free-space-00 */
char names[BTRFS_MAX_LEVEL][20];
struct lock_class_key keys[BTRFS_MAX_LEVEL];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
{ .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
{ .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
{ .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
{ .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
{ .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
{ .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
{ .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
{ .id = 0, DEFINE_NAME("tree") },
};
#undef DEFINE_LEVEL
#undef DEFINE_NAME
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
int level)
{
struct btrfs_lockdep_keyset *ks;
BUG_ON(level >= ARRAY_SIZE(ks->keys));
/* find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
if (ks->id == objectid)
break;
lockdep_set_class_and_name(&eb->lock,
&ks->keys[level], ks->names[level]);
}
#endif
/*
* Compute the csum of a btree block and store the result to provided buffer.
*/

View File

@ -137,14 +137,4 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_init_root_free_objectid(struct btrfs_root *root);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level);
#else
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{
}
#endif
#endif

View File

@ -4867,6 +4867,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
u64 lockdep_owner = owner;
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
if (IS_ERR(buf))
@ -4885,12 +4886,27 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ERR_PTR(-EUCLEAN);
}
/*
* The reloc trees are just snapshots, so we need them to appear to be
* just like any other fs tree WRT lockdep.
*
* The exception however is in replace_path() in relocation, where we
* hold the lock on the original fs root and then search for the reloc
* root. At that point we need to make sure any reloc root buffers are
* set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
* lockdep happy.
*/
if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
!test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
/*
* This needs to stay, because we could allocate a freed block from an
* old tree into a new tree, so we need to make sure this new block is
* set to the appropriate level and owner.
*/
btrfs_set_buffer_lockdep_class(owner, buf, level);
btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
__btrfs_tree_lock(buf, nest);
btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);

View File

@ -6140,6 +6140,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
u64 lockdep_owner = owner_root;
int uptodate = 1;
int ret;
@ -6164,7 +6165,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return ERR_PTR(-ENOMEM);
btrfs_set_buffer_lockdep_class(owner_root, eb, level);
/*
* The reloc trees are just snapshots, so we need them to appear to be
* just like any other fs tree WRT lockdep.
*/
if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID)
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {

View File

@ -13,6 +13,93 @@
#include "extent_io.h"
#include "locking.h"
/*
* Lockdep class keys for extent_buffer->lock's in this root. For a given
* eb, the lockdep key is determined by the btrfs_root it belongs to and
* the level the eb occupies in the tree.
*
* Different roots are used for different purposes and may nest inside each
* other and they require separate keysets. As lockdep keys should be
* static, assign keysets according to the purpose of the root as indicated
* by btrfs_root->root_key.objectid. This ensures that all special purpose
* roots have separate keysets.
*
* Lock-nesting across peer nodes is always done with the immediate parent
* node locked thus preventing deadlock. As lockdep doesn't know this, use
* subclass to avoid triggering lockdep warning in such cases.
*
* The key is set by the readpage_end_io_hook after the buffer has passed
* csum validation but before the pages are unlocked. It is also set by
* btrfs_init_new_buffer on freshly allocated blocks.
*
* We also add a check to make sure the highest level of the tree is the
* same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
* needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#if BTRFS_MAX_LEVEL != 8
#error
#endif
#define DEFINE_LEVEL(stem, level) \
.names[level] = "btrfs-" stem "-0" #level,
#define DEFINE_NAME(stem) \
DEFINE_LEVEL(stem, 0) \
DEFINE_LEVEL(stem, 1) \
DEFINE_LEVEL(stem, 2) \
DEFINE_LEVEL(stem, 3) \
DEFINE_LEVEL(stem, 4) \
DEFINE_LEVEL(stem, 5) \
DEFINE_LEVEL(stem, 6) \
DEFINE_LEVEL(stem, 7)
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
/* Longest entry: btrfs-free-space-00 */
char names[BTRFS_MAX_LEVEL][20];
struct lock_class_key keys[BTRFS_MAX_LEVEL];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
{ .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
{ .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
{ .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
{ .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
{ .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
{ .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
{ .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
{ .id = 0, DEFINE_NAME("tree") },
};
#undef DEFINE_LEVEL
#undef DEFINE_NAME
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
{
struct btrfs_lockdep_keyset *ks;
BUG_ON(level >= ARRAY_SIZE(ks->keys));
/* Find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
if (ks->id == objectid)
break;
lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
}
void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
{
if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
btrfs_set_buffer_lockdep_class(root->root_key.objectid,
eb, btrfs_header_level(eb));
}
#endif
/*
* Extent buffer locking
* =====================
@ -164,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
while (1) {
eb = btrfs_root_node(root);
btrfs_maybe_reset_lockdep_class(root, eb);
btrfs_tree_lock(eb);
if (eb == root->node)
break;
@ -185,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
while (1) {
eb = btrfs_root_node(root);
btrfs_maybe_reset_lockdep_class(root, eb);
btrfs_tree_read_lock(eb);
if (eb == root->node)
break;

View File

@ -131,4 +131,18 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level);
void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb);
#else
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{
}
static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root,
struct extent_buffer *eb)
{
}
#endif
#endif

View File

@ -1326,7 +1326,9 @@ again:
btrfs_release_path(path);
path->lowest_level = level;
set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
path->lowest_level = 0;
if (ret) {
if (ret > 0)
@ -3573,7 +3575,12 @@ int prepare_to_relocate(struct reloc_control *rc)
*/
return PTR_ERR(trans);
}
return btrfs_commit_transaction(trans);
ret = btrfs_commit_transaction(trans);
if (ret)
unset_reloc_control(rc);
return ret;
}
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)

View File

@ -1233,7 +1233,8 @@ static void extent_err(const struct extent_buffer *eb, int slot,
}
static int check_extent_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
struct btrfs_key *key, int slot,
struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
@ -1453,6 +1454,26 @@ static int check_extent_item(struct extent_buffer *leaf,
total_refs, inline_refs);
return -EUCLEAN;
}
if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
(prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
u64 prev_end = prev_key->objectid;
if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
prev_end += fs_info->nodesize;
else
prev_end += prev_key->offset;
if (unlikely(prev_end > key->objectid)) {
extent_err(leaf, slot,
"previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
prev_key->objectid, prev_key->type,
prev_key->offset, key->objectid, key->type,
key->offset);
return -EUCLEAN;
}
}
return 0;
}
@ -1621,7 +1642,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
ret = check_extent_item(leaf, key, slot);
ret = check_extent_item(leaf, key, slot, prev_key);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:

View File

@ -1146,7 +1146,9 @@ again:
extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
inode_objectid, parent_objectid, 0,
0);
if (!IS_ERR_OR_NULL(extref)) {
if (IS_ERR(extref)) {
return PTR_ERR(extref);
} else if (extref) {
u32 item_size;
u32 cur_offset = 0;
unsigned long base;
@ -1457,7 +1459,7 @@ static int add_link(struct btrfs_trans_handle *trans,
* on the inode will not free it. We will fixup the link count later.
*/
if (other_inode->i_nlink == 0)
inc_nlink(other_inode);
set_nlink(other_inode, 1);
add_link:
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
name, namelen, 0, ref_index);
@ -1600,7 +1602,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* free it. We will fixup the link count later.
*/
if (!ret && inode->i_nlink == 0)
inc_nlink(inode);
set_nlink(inode, 1);
}
if (ret < 0)
goto out;