mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-26 06:04:14 +08:00
eddda68d97
A weird KASAN problem that Zygo reported could have been easily caught if we checked for basic things in our backref freeing code. We have two methods of freeing a backref node - btrfs_backref_free_node: this just is kfree() essentially. - btrfs_backref_drop_node: this actually unlinks the node and cleans up everything and then calls btrfs_backref_free_node(). We should mostly be using btrfs_backref_drop_node(), to make sure the node is properly unlinked from the backref cache, and only use btrfs_backref_free_node() when we know the node isn't actually linked to the backref cache. We made a mistake here and thus got the KASAN splat. Make this style of issue easier to find by adding some ASSERT()'s to btrfs_backref_free_node() and adjusting our deletion stuff to properly init the list so we can rely on list_empty() checks working properly. BUG: KASAN: use-after-free in btrfs_backref_cleanup_node+0x18a/0x420 Read of size 8 at addr ffff888112402950 by task btrfs/28836 CPU: 0 PID: 28836 Comm: btrfs Tainted: G W 5.10.0-e35f27394290-for-next+ #23 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0xbc/0xf9 ? btrfs_backref_cleanup_node+0x18a/0x420 print_address_description.constprop.8+0x21/0x210 ? record_print_text.cold.34+0x11/0x11 ? btrfs_backref_cleanup_node+0x18a/0x420 ? btrfs_backref_cleanup_node+0x18a/0x420 kasan_report.cold.10+0x20/0x37 ? btrfs_backref_cleanup_node+0x18a/0x420 __asan_load8+0x69/0x90 btrfs_backref_cleanup_node+0x18a/0x420 btrfs_backref_release_cache+0x83/0x1b0 relocate_block_group+0x394/0x780 ? merge_reloc_roots+0x4a0/0x4a0 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 ? check_flags.part.50+0x6c/0x1e0 ? btrfs_relocate_chunk+0x120/0x120 ? kmem_cache_alloc_trace+0xa06/0xcb0 ? _copy_from_user+0x83/0xc0 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 ? __kasan_check_read+0x11/0x20 ? check_chain_key+0x1f4/0x2f0 ? __asan_loadN+0xf/0x20 ? btrfs_ioctl_get_supported_features+0x30/0x30 ? kvm_sched_clock_read+0x18/0x30 ? check_chain_key+0x1f4/0x2f0 ? lock_downgrade+0x3f0/0x3f0 ? handle_mm_fault+0xad6/0x2150 ? do_vfs_ioctl+0xfc/0x9d0 ? ioctl_file_clone+0xe0/0xe0 ? check_flags.part.50+0x6c/0x1e0 ? check_flags.part.50+0x6c/0x1e0 ? check_flags+0x26/0x30 ? lock_is_held_type+0xc3/0xf0 ? syscall_enter_from_user_mode+0x1b/0x60 ? do_syscall_64+0x13/0x80 ? rcu_read_lock_sched_held+0xa1/0xd0 ? __kasan_check_read+0x11/0x20 ? __fget_light+0xae/0x110 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f4c4bdfe427 RSP: 002b:00007fff33ee6df8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fff33ee6e98 RCX: 00007f4c4bdfe427 RDX: 00007fff33ee6e98 RSI: 00000000c4009420 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000078 R10: fffffffffffff59d R11: 0000000000000202 R12: 0000000000000001 R13: 0000000000000000 R14: 00007fff33ee8a34 R15: 0000000000000001 Allocated by task 28836: kasan_save_stack+0x21/0x50 __kasan_kmalloc.constprop.18+0xbe/0xd0 kasan_kmalloc+0x9/0x10 kmem_cache_alloc_trace+0x410/0xcb0 btrfs_backref_alloc_node+0x46/0xf0 btrfs_backref_add_tree_node+0x60d/0x11d0 build_backref_tree+0xc5/0x700 relocate_tree_blocks+0x2be/0xb90 relocate_block_group+0x2eb/0x780 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 28836: kasan_save_stack+0x21/0x50 kasan_set_track+0x20/0x30 kasan_set_free_info+0x1f/0x30 __kasan_slab_free+0xf3/0x140 kasan_slab_free+0xe/0x10 kfree+0xde/0x200 btrfs_backref_error_cleanup+0x452/0x530 build_backref_tree+0x1a5/0x700 relocate_tree_blocks+0x2be/0xb90 relocate_block_group+0x2eb/0x780 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff888112402900 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 80 bytes inside of 128-byte region [ffff888112402900, ffff888112402980) The buggy address belongs to the page: page:0000000028b1cd08 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888131c810c0 pfn:0x112402 flags: 0x17ffe0000000200(slab) raw: 017ffe0000000200 ffffea000424f308 ffffea0007d572c8 ffff888100040440 raw: ffff888131c810c0 ffff888112402000 0000000100000009 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888112402800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888112402880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888112402900: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888112402980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888112402a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Link: https://lore.kernel.org/linux-btrfs/20201208194607.GI31381@hungrycats.org/ CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
382 lines
10 KiB
C
382 lines
10 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2011 STRATO. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_BACKREF_H
|
|
#define BTRFS_BACKREF_H
|
|
|
|
#include <linux/btrfs.h>
|
|
#include "ulist.h"
|
|
#include "disk-io.h"
|
|
#include "extent_io.h"
|
|
|
|
struct inode_fs_paths {
|
|
struct btrfs_path *btrfs_path;
|
|
struct btrfs_root *fs_root;
|
|
struct btrfs_data_container *fspath;
|
|
};
|
|
|
|
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
|
|
void *ctx);
|
|
|
|
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
|
|
struct btrfs_path *path, struct btrfs_key *found_key,
|
|
u64 *flags);
|
|
|
|
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
|
|
struct btrfs_key *key, struct btrfs_extent_item *ei,
|
|
u32 item_size, u64 *out_root, u8 *out_level);
|
|
|
|
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
|
u64 extent_item_objectid,
|
|
u64 extent_offset, int search_commit_root,
|
|
iterate_extent_inodes_t *iterate, void *ctx,
|
|
bool ignore_offset);
|
|
|
|
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
|
struct btrfs_path *path,
|
|
iterate_extent_inodes_t *iterate, void *ctx,
|
|
bool ignore_offset);
|
|
|
|
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
|
|
|
|
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
u64 time_seq, struct ulist **leafs,
|
|
const u64 *extent_item_pos, bool ignore_offset);
|
|
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info, u64 bytenr,
|
|
u64 time_seq, struct ulist **roots, bool ignore_offset);
|
|
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
|
u32 name_len, unsigned long name_off,
|
|
struct extent_buffer *eb_in, u64 parent,
|
|
char *dest, u32 size);
|
|
|
|
struct btrfs_data_container *init_data_container(u32 total_bytes);
|
|
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
|
|
struct btrfs_path *path);
|
|
void free_ipath(struct inode_fs_paths *ipath);
|
|
|
|
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
|
|
u64 start_off, struct btrfs_path *path,
|
|
struct btrfs_inode_extref **ret_extref,
|
|
u64 *found_off);
|
|
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
|
struct ulist *roots, struct ulist *tmp_ulist);
|
|
|
|
int __init btrfs_prelim_ref_init(void);
|
|
void __cold btrfs_prelim_ref_exit(void);
|
|
|
|
struct prelim_ref {
|
|
struct rb_node rbnode;
|
|
u64 root_id;
|
|
struct btrfs_key key_for_search;
|
|
int level;
|
|
int count;
|
|
struct extent_inode_elem *inode_list;
|
|
u64 parent;
|
|
u64 wanted_disk_byte;
|
|
};
|
|
|
|
/*
|
|
* Iterate backrefs of one extent.
|
|
*
|
|
* Now it only supports iteration of tree block in commit root.
|
|
*/
|
|
struct btrfs_backref_iter {
|
|
u64 bytenr;
|
|
struct btrfs_path *path;
|
|
struct btrfs_fs_info *fs_info;
|
|
struct btrfs_key cur_key;
|
|
u32 item_ptr;
|
|
u32 cur_ptr;
|
|
u32 end_ptr;
|
|
};
|
|
|
|
struct btrfs_backref_iter *btrfs_backref_iter_alloc(
|
|
struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
|
|
|
|
static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
|
|
{
|
|
if (!iter)
|
|
return;
|
|
btrfs_free_path(iter->path);
|
|
kfree(iter);
|
|
}
|
|
|
|
static inline struct extent_buffer *btrfs_backref_get_eb(
|
|
struct btrfs_backref_iter *iter)
|
|
{
|
|
if (!iter)
|
|
return NULL;
|
|
return iter->path->nodes[0];
|
|
}
|
|
|
|
/*
|
|
* For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
|
|
* is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
|
|
*
|
|
* This helper determines if that's the case.
|
|
*/
|
|
static inline bool btrfs_backref_has_tree_block_info(
|
|
struct btrfs_backref_iter *iter)
|
|
{
|
|
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
|
|
iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
|
|
|
|
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
|
|
|
|
static inline bool btrfs_backref_iter_is_inline_ref(
|
|
struct btrfs_backref_iter *iter)
|
|
{
|
|
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
|
|
iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
|
|
{
|
|
iter->bytenr = 0;
|
|
iter->item_ptr = 0;
|
|
iter->cur_ptr = 0;
|
|
iter->end_ptr = 0;
|
|
btrfs_release_path(iter->path);
|
|
memset(&iter->cur_key, 0, sizeof(iter->cur_key));
|
|
}
|
|
|
|
/*
|
|
* Backref cache related structures
|
|
*
|
|
* The whole objective of backref_cache is to build a bi-directional map
|
|
* of tree blocks (represented by backref_node) and all their parents.
|
|
*/
|
|
|
|
/*
|
|
* Represent a tree block in the backref cache
|
|
*/
|
|
struct btrfs_backref_node {
|
|
struct {
|
|
struct rb_node rb_node;
|
|
u64 bytenr;
|
|
}; /* Use rb_simple_node for search/insert */
|
|
|
|
u64 new_bytenr;
|
|
/* Objectid of tree block owner, can be not uptodate */
|
|
u64 owner;
|
|
/* Link to pending, changed or detached list */
|
|
struct list_head list;
|
|
|
|
/* List of upper level edges, which link this node to its parents */
|
|
struct list_head upper;
|
|
/* List of lower level edges, which link this node to its children */
|
|
struct list_head lower;
|
|
|
|
/* NULL if this node is not tree root */
|
|
struct btrfs_root *root;
|
|
/* Extent buffer got by COWing the block */
|
|
struct extent_buffer *eb;
|
|
/* Level of the tree block */
|
|
unsigned int level:8;
|
|
/* Is the block in a non-shareable tree */
|
|
unsigned int cowonly:1;
|
|
/* 1 if no child node is in the cache */
|
|
unsigned int lowest:1;
|
|
/* Is the extent buffer locked */
|
|
unsigned int locked:1;
|
|
/* Has the block been processed */
|
|
unsigned int processed:1;
|
|
/* Have backrefs of this block been checked */
|
|
unsigned int checked:1;
|
|
/*
|
|
* 1 if corresponding block has been COWed but some upper level block
|
|
* pointers may not point to the new location
|
|
*/
|
|
unsigned int pending:1;
|
|
/* 1 if the backref node isn't connected to any other backref node */
|
|
unsigned int detached:1;
|
|
|
|
/*
|
|
* For generic purpose backref cache, where we only care if it's a reloc
|
|
* root, doesn't care the source subvolid.
|
|
*/
|
|
unsigned int is_reloc_root:1;
|
|
};
|
|
|
|
#define LOWER 0
|
|
#define UPPER 1
|
|
|
|
/*
|
|
* Represent an edge connecting upper and lower backref nodes.
|
|
*/
|
|
struct btrfs_backref_edge {
|
|
/*
|
|
* list[LOWER] is linked to btrfs_backref_node::upper of lower level
|
|
* node, and list[UPPER] is linked to btrfs_backref_node::lower of
|
|
* upper level node.
|
|
*
|
|
* Also, build_backref_tree() uses list[UPPER] for pending edges, before
|
|
* linking list[UPPER] to its upper level nodes.
|
|
*/
|
|
struct list_head list[2];
|
|
|
|
/* Two related nodes */
|
|
struct btrfs_backref_node *node[2];
|
|
};
|
|
|
|
struct btrfs_backref_cache {
|
|
/* Red black tree of all backref nodes in the cache */
|
|
struct rb_root rb_root;
|
|
/* For passing backref nodes to btrfs_reloc_cow_block */
|
|
struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
|
|
/*
|
|
* List of blocks that have been COWed but some block pointers in upper
|
|
* level blocks may not reflect the new location
|
|
*/
|
|
struct list_head pending[BTRFS_MAX_LEVEL];
|
|
/* List of backref nodes with no child node */
|
|
struct list_head leaves;
|
|
/* List of blocks that have been COWed in current transaction */
|
|
struct list_head changed;
|
|
/* List of detached backref node. */
|
|
struct list_head detached;
|
|
|
|
u64 last_trans;
|
|
|
|
int nr_nodes;
|
|
int nr_edges;
|
|
|
|
/* List of unchecked backref edges during backref cache build */
|
|
struct list_head pending_edge;
|
|
|
|
/* List of useless backref nodes during backref cache build */
|
|
struct list_head useless_node;
|
|
|
|
struct btrfs_fs_info *fs_info;
|
|
|
|
/*
|
|
* Whether this cache is for relocation
|
|
*
|
|
* Reloction backref cache require more info for reloc root compared
|
|
* to generic backref cache.
|
|
*/
|
|
unsigned int is_reloc;
|
|
};
|
|
|
|
void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_backref_cache *cache, int is_reloc);
|
|
struct btrfs_backref_node *btrfs_backref_alloc_node(
|
|
struct btrfs_backref_cache *cache, u64 bytenr, int level);
|
|
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
|
|
struct btrfs_backref_cache *cache);
|
|
|
|
#define LINK_LOWER (1 << 0)
|
|
#define LINK_UPPER (1 << 1)
|
|
static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
|
|
struct btrfs_backref_node *lower,
|
|
struct btrfs_backref_node *upper,
|
|
int link_which)
|
|
{
|
|
ASSERT(upper && lower && upper->level == lower->level + 1);
|
|
edge->node[LOWER] = lower;
|
|
edge->node[UPPER] = upper;
|
|
if (link_which & LINK_LOWER)
|
|
list_add_tail(&edge->list[LOWER], &lower->upper);
|
|
if (link_which & LINK_UPPER)
|
|
list_add_tail(&edge->list[UPPER], &upper->lower);
|
|
}
|
|
|
|
static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
|
|
struct btrfs_backref_node *node)
|
|
{
|
|
if (node) {
|
|
ASSERT(list_empty(&node->list));
|
|
ASSERT(list_empty(&node->lower));
|
|
ASSERT(node->eb == NULL);
|
|
cache->nr_nodes--;
|
|
btrfs_put_root(node->root);
|
|
kfree(node);
|
|
}
|
|
}
|
|
|
|
static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
|
|
struct btrfs_backref_edge *edge)
|
|
{
|
|
if (edge) {
|
|
cache->nr_edges--;
|
|
kfree(edge);
|
|
}
|
|
}
|
|
|
|
static inline void btrfs_backref_unlock_node_buffer(
|
|
struct btrfs_backref_node *node)
|
|
{
|
|
if (node->locked) {
|
|
btrfs_tree_unlock(node->eb);
|
|
node->locked = 0;
|
|
}
|
|
}
|
|
|
|
static inline void btrfs_backref_drop_node_buffer(
|
|
struct btrfs_backref_node *node)
|
|
{
|
|
if (node->eb) {
|
|
btrfs_backref_unlock_node_buffer(node);
|
|
free_extent_buffer(node->eb);
|
|
node->eb = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Drop the backref node from cache without cleaning up its children
|
|
* edges.
|
|
*
|
|
* This can only be called on node without parent edges.
|
|
* The children edges are still kept as is.
|
|
*/
|
|
static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
|
|
struct btrfs_backref_node *node)
|
|
{
|
|
ASSERT(list_empty(&node->upper));
|
|
|
|
btrfs_backref_drop_node_buffer(node);
|
|
list_del_init(&node->list);
|
|
list_del_init(&node->lower);
|
|
if (!RB_EMPTY_NODE(&node->rb_node))
|
|
rb_erase(&node->rb_node, &tree->rb_root);
|
|
btrfs_backref_free_node(tree, node);
|
|
}
|
|
|
|
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
|
|
struct btrfs_backref_node *node);
|
|
|
|
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
|
|
|
|
static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
|
|
u64 bytenr, int errno)
|
|
{
|
|
btrfs_panic(fs_info, errno,
|
|
"Inconsistency in backref cache found at offset %llu",
|
|
bytenr);
|
|
}
|
|
|
|
int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
|
|
struct btrfs_path *path,
|
|
struct btrfs_backref_iter *iter,
|
|
struct btrfs_key *node_key,
|
|
struct btrfs_backref_node *cur);
|
|
|
|
int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
|
|
struct btrfs_backref_node *start);
|
|
|
|
void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
|
|
struct btrfs_backref_node *node);
|
|
|
|
#endif
|