btrfs-progs/check/main.c

10515 lines
269 KiB
C
Raw Normal View History

2007-06-12 21:07:11 +08:00
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
2007-04-24 03:56:27 +08:00
#include <fcntl.h>
#include <getopt.h>
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <uuid/uuid.h>
#include "kernel-lib/list.h"
#include "kernel-lib/rbtree.h"
#include "kernel-lib/rbtree_types.h"
#include "kernel-shared/extent_io.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/volumes.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/print-tree.h"
#include "kernel-shared/transaction.h"
#include "kernel-shared/free-space-cache.h"
#include "kernel-shared/free-space-tree.h"
#include "kernel-shared/backref.h"
#include "kernel-shared/ulist.h"
#include "common/defs.h"
#include "common/extent-cache.h"
#include "common/internal.h"
#include "common/messages.h"
#include "common/task-utils.h"
#include "common/device-utils.h"
#include "common/utils.h"
#include "common/rbtree-utils.h"
#include "common/help.h"
#include "common/open-utils.h"
#include "common/string-utils.h"
#include "cmds/commands.h"
#include "mkfs/common.h"
#include "check/common.h"
#include "check/repair.h"
#include "check/mode-common.h"
#include "check/mode-original.h"
#include "check/mode-lowmem.h"
#include "check/qgroup-verify.h"
#include "check/clear-cache.h"
#include "ioctl.h"
/* Global context variables */
struct btrfs_fs_info *gfs_info;
u64 bytes_used = 0;
u64 total_csum_bytes = 0;
u64 total_btree_bytes = 0;
u64 total_fs_tree_bytes = 0;
u64 total_extent_tree_bytes = 0;
u64 btree_space_waste = 0;
u64 data_bytes_allocated = 0;
u64 data_bytes_referenced = 0;
LIST_HEAD(duplicate_extents);
LIST_HEAD(delete_items);
int no_holes = 0;
static int is_free_space_tree = 0;
int init_extent_tree = 0;
int check_data_csum = 0;
struct cache_tree *roots_info_cache = NULL;
2007-05-10 08:13:48 +08:00
enum btrfs_check_mode {
CHECK_MODE_ORIGINAL,
CHECK_MODE_LOWMEM,
CHECK_MODE_UNKNOWN,
CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
};
static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
struct device_record {
struct rb_node node;
u64 devid;
u64 generation;
u64 objectid;
u8 type;
u64 offset;
u64 total_byte;
u64 byte_used;
u64 real_used;
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
bool bad_block_dev_size;
};
static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
{
struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
struct data_backref *back1 = to_data_backref(ext1);
struct data_backref *back2 = to_data_backref(ext2);
WARN_ON(!ext1->is_data);
WARN_ON(!ext2->is_data);
/* parent and root are a union, so this covers both */
if (back1->parent > back2->parent)
return 1;
if (back1->parent < back2->parent)
return -1;
/* This is a full backref and the parents match. */
if (back1->node.full_backref)
return 0;
if (back1->owner > back2->owner)
return 1;
if (back1->owner < back2->owner)
return -1;
if (back1->offset > back2->offset)
return 1;
if (back1->offset < back2->offset)
return -1;
if (back1->found_ref && back2->found_ref) {
if (back1->disk_bytenr > back2->disk_bytenr)
return 1;
if (back1->disk_bytenr < back2->disk_bytenr)
return -1;
if (back1->bytes > back2->bytes)
return 1;
if (back1->bytes < back2->bytes)
return -1;
}
return 0;
}
static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
{
struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
struct tree_backref *back1 = to_tree_backref(ext1);
struct tree_backref *back2 = to_tree_backref(ext2);
WARN_ON(ext1->is_data);
WARN_ON(ext2->is_data);
/* parent and root are a union, so this covers both */
if (back1->parent > back2->parent)
return 1;
if (back1->parent < back2->parent)
return -1;
return 0;
}
static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
{
struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
if (ext1->is_data > ext2->is_data)
return 1;
if (ext1->is_data < ext2->is_data)
return -1;
if (ext1->full_backref > ext2->full_backref)
return 1;
if (ext1->full_backref < ext2->full_backref)
return -1;
if (ext1->is_data)
return compare_data_backref(node1, node2);
else
return compare_tree_backref(node1, node2);
}
static void print_status_check_line(void *p)
{
struct task_ctx *priv = p;
const char *task_position_string[] = {
"[1/7] checking root items ",
"[2/7] checking extents ",
is_free_space_tree ?
"[3/7] checking free space tree " :
"[3/7] checking free space cache ",
"[4/7] checking fs roots ",
check_data_csum ?
"[5/7] checking csums against data " :
"[5/7] checking csums (without verifying data) ",
"[6/7] checking root refs ",
"[7/7] checking quota groups ",
};
time_t elapsed;
int hours;
int minutes;
int seconds;
elapsed = time(NULL) - priv->start_time;
hours = elapsed / 3600;
elapsed -= hours * 3600;
minutes = elapsed / 60;
elapsed -= minutes * 60;
seconds = elapsed;
printf("%s (%d:%02d:%02d elapsed", task_position_string[priv->tp],
hours, minutes, seconds);
if (priv->item_count > 0)
printf(", %llu items checked)\r", priv->item_count);
else
printf(")\r");
fflush(stdout);
}
static void *print_status_check(void *p)
{
struct task_ctx *priv = p;
/* 1 second */
task_period_start(priv->info, 1000);
if (priv->tp == TASK_NOTHING)
return NULL;
while (1) {
print_status_check_line(p);
task_period_wait(priv->info);
}
return NULL;
}
static int print_status_return(void *p)
{
print_status_check_line(p);
printf("\n");
fflush(stdout);
return 0;
}
static enum btrfs_check_mode parse_check_mode(const char *str)
{
if (strcmp(str, "lowmem") == 0)
return CHECK_MODE_LOWMEM;
if (strcmp(str, "orig") == 0)
return CHECK_MODE_ORIGINAL;
if (strcmp(str, "original") == 0)
return CHECK_MODE_ORIGINAL;
return CHECK_MODE_UNKNOWN;
}
/* Compatible function to allow reuse of old codes */
static u64 first_extent_gap(struct rb_root *holes)
{
struct file_extent_hole *hole;
if (RB_EMPTY_ROOT(holes))
return (u64)-1;
hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
return hole->start;
}
static int compare_hole(struct rb_node *node1, struct rb_node *node2)
{
struct file_extent_hole *hole1;
struct file_extent_hole *hole2;
hole1 = rb_entry(node1, struct file_extent_hole, node);
hole2 = rb_entry(node2, struct file_extent_hole, node);
if (hole1->start > hole2->start)
return -1;
if (hole1->start < hole2->start)
return 1;
/* Now hole1->start == hole2->start */
if (hole1->len >= hole2->len)
/*
* Hole 1 will be merge center
* Same hole will be merged later
*/
return -1;
/* Hole 2 will be merge center */
return 1;
}
/*
* Add a hole to the record
*
* This will do hole merge for copy_file_extent_holes(),
* which will ensure there won't be continuous holes.
*/
static int add_file_extent_hole(struct rb_root *holes,
u64 start, u64 len)
{
struct file_extent_hole *hole;
struct file_extent_hole *prev = NULL;
struct file_extent_hole *next = NULL;
hole = malloc(sizeof(*hole));
if (!hole)
return -ENOMEM;
hole->start = start;
hole->len = len;
/* Since compare will not return 0, no -EEXIST will happen */
rb_insert(holes, &hole->node, compare_hole);
/* simple merge with previous hole */
if (rb_prev(&hole->node))
prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
node);
if (prev && prev->start + prev->len >= hole->start) {
hole->len = hole->start + hole->len - prev->start;
hole->start = prev->start;
rb_erase(&prev->node, holes);
free(prev);
prev = NULL;
}
/* iterate merge with next holes */
while (1) {
if (!rb_next(&hole->node))
break;
next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
node);
if (hole->start + hole->len >= next->start) {
if (hole->start + hole->len <= next->start + next->len)
hole->len = next->start + next->len -
hole->start;
rb_erase(&next->node, holes);
free(next);
next = NULL;
} else
break;
}
return 0;
}
static int compare_hole_range(struct rb_node *node, void *data)
{
struct file_extent_hole *hole;
u64 start;
hole = (struct file_extent_hole *)data;
start = hole->start;
hole = rb_entry(node, struct file_extent_hole, node);
if (start < hole->start)
return -1;
if (start >= hole->start && start < hole->start + hole->len)
return 0;
return 1;
}
/*
* Delete a hole in the record
*
* This will do the hole split and is much restrict than add.
*/
static int del_file_extent_hole(struct rb_root *holes,
u64 start, u64 len)
{
struct file_extent_hole *hole;
struct file_extent_hole tmp;
u64 prev_start = 0;
u64 prev_len = 0;
u64 next_start = 0;
u64 next_len = 0;
struct rb_node *node;
int have_prev = 0;
int have_next = 0;
int ret = 0;
tmp.start = start;
tmp.len = len;
node = rb_search(holes, &tmp, compare_hole_range, NULL);
if (!node)
return -EEXIST;
hole = rb_entry(node, struct file_extent_hole, node);
if (start + len > hole->start + hole->len)
return -EEXIST;
/*
* Now there will be no overlap, delete the hole and re-add the
* split(s) if they exists.
*/
if (start > hole->start) {
prev_start = hole->start;
prev_len = start - hole->start;
have_prev = 1;
}
if (hole->start + hole->len > start + len) {
next_start = start + len;
next_len = hole->start + hole->len - start - len;
have_next = 1;
}
rb_erase(node, holes);
free(hole);
if (have_prev) {
ret = add_file_extent_hole(holes, prev_start, prev_len);
if (ret < 0)
return ret;
}
if (have_next) {
ret = add_file_extent_hole(holes, next_start, next_len);
if (ret < 0)
return ret;
}
return 0;
}
static int copy_file_extent_holes(struct rb_root *dst,
struct rb_root *src)
{
struct file_extent_hole *hole;
struct rb_node *node;
int ret = 0;
node = rb_first(src);
while (node) {
hole = rb_entry(node, struct file_extent_hole, node);
ret = add_file_extent_hole(dst, hole->start, hole->len);
if (ret)
break;
node = rb_next(node);
}
return ret;
}
static void free_file_extent_holes(struct rb_root *holes)
{
struct rb_node *node;
struct file_extent_hole *hole;
node = rb_first(holes);
while (node) {
hole = rb_entry(node, struct file_extent_hole, node);
rb_erase(node, holes);
free(hole);
node = rb_first(holes);
}
}
static void record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->last_trans != trans->transid) {
root->track_dirty = 1;
root->last_trans = trans->transid;
root->commit_root = root->node;
extent_buffer_get(root->node);
}
}
static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
{
struct device_record *rec1;
struct device_record *rec2;
rec1 = rb_entry(node1, struct device_record, node);
rec2 = rb_entry(node2, struct device_record, node);
if (rec1->devid > rec2->devid)
return -1;
else if (rec1->devid < rec2->devid)
return 1;
else
return 0;
}
static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
{
struct inode_record *rec;
struct inode_backref *backref;
struct inode_backref *orig;
struct inode_backref *tmp;
struct mismatch_dir_hash_record *hash_record;
struct mismatch_dir_hash_record *new_record;
struct unaligned_extent_rec_t *src;
struct unaligned_extent_rec_t *dst;
struct rb_node *rb;
size_t size;
int ret;
rec = malloc(sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
memcpy(rec, orig_rec, sizeof(*rec));
rec->refs = 1;
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->mismatch_dir_hash);
INIT_LIST_HEAD(&rec->unaligned_extent_recs);
rec->holes = RB_ROOT;
list_for_each_entry(orig, &orig_rec->backrefs, list) {
size = sizeof(*orig) + orig->namelen + 1;
backref = malloc(size);
if (!backref) {
ret = -ENOMEM;
goto cleanup;
}
memcpy(backref, orig, size);
list_add_tail(&backref->list, &rec->backrefs);
}
list_for_each_entry(hash_record, &orig_rec->mismatch_dir_hash, list) {
size = sizeof(*hash_record) + hash_record->namelen;
new_record = malloc(size);
if (!new_record) {
ret = -ENOMEM;
goto cleanup;
}
memcpy(&new_record, hash_record, size);
list_add_tail(&new_record->list, &rec->mismatch_dir_hash);
}
list_for_each_entry(src, &orig_rec->unaligned_extent_recs, list) {
size = sizeof(*src);
dst = malloc(size);
if (!dst) {
ret = -ENOMEM;
goto cleanup;
}
memcpy(dst, src, size);
list_add_tail(&dst->list, &rec->unaligned_extent_recs);
}
ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
if (ret < 0)
goto cleanup_rb;
return rec;
cleanup_rb:
rb = rb_first(&rec->holes);
while (rb) {
struct file_extent_hole *hole;
hole = rb_entry(rb, struct file_extent_hole, node);
rb = rb_next(rb);
free(hole);
}
cleanup:
if (!list_empty(&rec->backrefs))
list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
list_del(&orig->list);
free(orig);
}
if (!list_empty(&rec->mismatch_dir_hash)) {
list_for_each_entry_safe(hash_record, new_record,
&rec->mismatch_dir_hash, list) {
list_del(&hash_record->list);
free(hash_record);
}
}
if (!list_empty(&rec->unaligned_extent_recs))
list_for_each_entry_safe(src, dst, &rec->unaligned_extent_recs,
list) {
list_del(&src->list);
free(src);
}
free(rec);
return ERR_PTR(ret);
}
static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
{
u64 root_objectid = root->root_key.objectid;
int errors = rec->errors;
if (!errors)
return;
/* reloc root errors, we print its corresponding fs root objectid*/
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
root_objectid = root->root_key.offset;
fprintf(stderr, "reloc");
}
fprintf(stderr, "root %llu inode %llu errors %x",
(unsigned long long) root_objectid,
(unsigned long long) rec->ino, rec->errors);
if (errors & I_ERR_NO_INODE_ITEM)
fprintf(stderr, ", no inode item");
if (errors & I_ERR_NO_ORPHAN_ITEM)
fprintf(stderr, ", no orphan item");
if (errors & I_ERR_DUP_INODE_ITEM)
fprintf(stderr, ", dup inode item");
if (errors & I_ERR_DUP_DIR_INDEX)
fprintf(stderr, ", dup dir index");
if (errors & I_ERR_ODD_DIR_ITEM)
fprintf(stderr, ", odd dir item");
if (errors & I_ERR_ODD_FILE_EXTENT)
fprintf(stderr, ", odd file extent");
if (errors & I_ERR_BAD_FILE_EXTENT)
fprintf(stderr, ", bad file extent");
if (errors & I_ERR_FILE_EXTENT_OVERLAP)
fprintf(stderr, ", file extent overlap");
if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
fprintf(stderr, ", inline file extent too large");
if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
fprintf(stderr, ", file extent discount");
if (errors & I_ERR_DIR_ISIZE_WRONG)
fprintf(stderr, ", dir isize wrong");
if (errors & I_ERR_FILE_NBYTES_WRONG)
fprintf(stderr, ", nbytes wrong");
if (errors & I_ERR_ODD_CSUM_ITEM)
fprintf(stderr, ", odd csum item");
if (errors & I_ERR_SOME_CSUM_MISSING)
fprintf(stderr, ", some csum missing");
if (errors & I_ERR_LINK_COUNT_WRONG)
fprintf(stderr, ", link count wrong");
if (errors & I_ERR_ODD_INODE_FLAGS)
fprintf(stderr, ", odd inode flags");
if (errors & I_ERR_INLINE_RAM_BYTES_WRONG)
fprintf(stderr, ", invalid inline ram bytes");
if (errors & I_ERR_INVALID_IMODE)
fprintf(stderr, ", invalid inode mode bit 0%o",
rec->imode & ~07777);
if (errors & I_ERR_INVALID_GEN)
fprintf(stderr, ", invalid inode generation or transid");
if (errors & I_ERR_INVALID_NLINK)
fprintf(stderr, ", directory has invalid nlink %d",
rec->nlink);
if (errors & I_ERR_INVALID_XATTR)
fprintf(stderr, ", invalid xattr");
fprintf(stderr, "\n");
/* Print the holes if needed */
if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
struct file_extent_hole *hole;
struct rb_node *node;
int found = 0;
node = rb_first(&rec->holes);
fprintf(stderr, "Found file extent holes:\n");
while (node) {
found = 1;
hole = rb_entry(node, struct file_extent_hole, node);
fprintf(stderr, "\tstart: %llu, len: %llu\n",
hole->start, hole->len);
node = rb_next(node);
}
if (!found) {
u64 start, len;
if (rec->extent_end < rec->isize) {
start = rec->extent_end;
len = round_up(rec->isize,
gfs_info->sectorsize) - start;
} else {
start = 0;
len = rec->extent_start;
}
fprintf(stderr, "\tstart: %llu, len: %llu\n", start,
len);
}
}
/* Print dir item with mismatch hash */
if (errors & I_ERR_MISMATCH_DIR_HASH) {
struct mismatch_dir_hash_record *hash_record;
fprintf(stderr, "Dir items with mismatch hash:\n");
list_for_each_entry(hash_record, &rec->mismatch_dir_hash,
list) {
char *namebuf = (char *)(hash_record + 1);
u32 crc;
crc = btrfs_name_hash(namebuf, hash_record->namelen);
fprintf(stderr,
"\tname: %.*s namelen: %u wanted 0x%08x has 0x%08llx\n",
hash_record->namelen, namebuf,
hash_record->namelen, crc,
hash_record->key.offset);
}
}
}
static void print_ref_error(int errors)
{
if (errors & REF_ERR_NO_DIR_ITEM)
fprintf(stderr, ", no dir item");
if (errors & REF_ERR_NO_DIR_INDEX)
fprintf(stderr, ", no dir index");
if (errors & REF_ERR_NO_INODE_REF)
fprintf(stderr, ", no inode ref");
if (errors & REF_ERR_DUP_DIR_ITEM)
fprintf(stderr, ", dup dir item");
if (errors & REF_ERR_DUP_DIR_INDEX)
fprintf(stderr, ", dup dir index");
if (errors & REF_ERR_DUP_INODE_REF)
fprintf(stderr, ", dup inode ref");
if (errors & REF_ERR_INDEX_UNMATCH)
fprintf(stderr, ", index mismatch");
if (errors & REF_ERR_FILETYPE_UNMATCH)
fprintf(stderr, ", filetype mismatch");
if (errors & REF_ERR_NAME_TOO_LONG)
fprintf(stderr, ", name too long");
if (errors & REF_ERR_NO_ROOT_REF)
fprintf(stderr, ", no root ref");
if (errors & REF_ERR_NO_ROOT_BACKREF)
fprintf(stderr, ", no root backref");
if (errors & REF_ERR_DUP_ROOT_REF)
fprintf(stderr, ", dup root ref");
if (errors & REF_ERR_DUP_ROOT_BACKREF)
fprintf(stderr, ", dup root backref");
fprintf(stderr, "\n");
}
static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
u64 ino, int mod)
{
struct ptr_node *node;
struct cache_extent *cache;
struct inode_record *rec = NULL;
int ret;
cache = lookup_cache_extent(inode_cache, ino, 1);
if (cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
if (mod && rec->refs > 1) {
node->data = clone_inode_rec(rec);
if (IS_ERR(node->data))
return node->data;
rec->refs--;
rec = node->data;
}
} else if (mod) {
rec = calloc(1, sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
rec->ino = ino;
rec->extent_start = (u64)-1;
rec->refs = 1;
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->mismatch_dir_hash);
INIT_LIST_HEAD(&rec->unaligned_extent_recs);
rec->holes = RB_ROOT;
node = malloc(sizeof(*node));
if (!node) {
free(rec);
return ERR_PTR(-ENOMEM);
}
node->cache.start = ino;
node->cache.size = 1;
node->data = rec;
if (ino == BTRFS_FREE_INO_OBJECTID)
rec->found_link = 1;
ret = insert_cache_extent(inode_cache, &node->cache);
if (ret)
return ERR_PTR(-EEXIST);
}
return rec;
}
static void free_unaligned_extent_recs(struct list_head *unaligned_extent_recs)
{
struct unaligned_extent_rec_t *urec;
while (!list_empty(unaligned_extent_recs)) {
urec = list_entry(unaligned_extent_recs->next,
struct unaligned_extent_rec_t, list);
list_del(&urec->list);
free(urec);
}
}
static void free_inode_rec(struct inode_record *rec)
{
struct inode_backref *backref;
struct mismatch_dir_hash_record *hash;
struct mismatch_dir_hash_record *next;
if (--rec->refs > 0)
return;
while (!list_empty(&rec->backrefs)) {
backref = to_inode_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
list_for_each_entry_safe(hash, next, &rec->mismatch_dir_hash, list)
free(hash);
free_unaligned_extent_recs(&rec->unaligned_extent_recs);
free_file_extent_holes(&rec->holes);
free(rec);
}
static int can_free_inode_rec(struct inode_record *rec)
{
if (!rec->errors && rec->checked && rec->found_inode_item &&
rec->nlink == rec->found_link && list_empty(&rec->backrefs))
return 1;
return 0;
}
static void maybe_free_inode_rec(struct cache_tree *inode_cache,
struct inode_record *rec)
{
struct cache_extent *cache;
struct inode_backref *tmp, *backref;
struct ptr_node *node;
u8 filetype;
if (!rec->found_inode_item)
return;
filetype = imode_to_type(rec->imode);
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
if (backref->found_dir_item && backref->found_dir_index) {
if (backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
if (!backref->errors && backref->found_inode_ref &&
rec->nlink == rec->found_link) {
list_del(&backref->list);
free(backref);
}
}
}
if (!rec->checked || rec->merging)
return;
if (!is_valid_imode(rec->imode))
rec->errors |= I_ERR_INVALID_IMODE;
if (S_ISDIR(rec->imode)) {
if (rec->found_size != rec->isize)
rec->errors |= I_ERR_DIR_ISIZE_WRONG;
if (rec->found_file_extent)
rec->errors |= I_ERR_ODD_FILE_EXTENT;
} else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
if (rec->found_dir_item)
rec->errors |= I_ERR_ODD_DIR_ITEM;
/* Orphan inodes don't have correct nbytes */
if (rec->nlink > 0 && rec->found_size != rec->nbytes)
rec->errors |= I_ERR_FILE_NBYTES_WRONG;
if (rec->nlink > 0 && !no_holes && rec->isize &&
(rec->extent_end < rec->isize ||
rec->extent_start != 0 ||
first_extent_gap(&rec->holes) < rec->isize))
rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
}
if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
if (rec->found_csum_item && rec->nodatasum)
rec->errors |= I_ERR_ODD_CSUM_ITEM;
if (rec->some_csum_missing && !rec->nodatasum)
rec->errors |= I_ERR_SOME_CSUM_MISSING;
}
BUG_ON(rec->refs != 1);
if (can_free_inode_rec(rec)) {
cache = lookup_cache_extent(inode_cache, rec->ino, 1);
node = container_of(cache, struct ptr_node, cache);
BUG_ON(node->data != rec);
remove_cache_extent(inode_cache, &node->cache);
free(node);
free_inode_rec(rec);
}
}
static int check_orphan_item(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path;
struct btrfs_key key;
int ret;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = ino;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
btrfs_release_path(&path);
if (ret > 0)
ret = -ENOENT;
return ret;
}
static int process_inode_item(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
struct inode_record *rec;
struct btrfs_inode_item *item;
u64 gen_uplimit;
u64 flags;
rec = active_node->current;
BUG_ON(rec->ino != key->objectid || rec->refs > 1);
if (rec->found_inode_item) {
rec->errors |= I_ERR_DUP_INODE_ITEM;
return 1;
}
item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
rec->nlink = btrfs_inode_nlink(eb, item);
rec->isize = btrfs_inode_size(eb, item);
rec->nbytes = btrfs_inode_nbytes(eb, item);
rec->imode = btrfs_inode_mode(eb, item);
if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
rec->nodatasum = 1;
rec->found_inode_item = 1;
if (rec->nlink == 0)
rec->errors |= I_ERR_NO_ORPHAN_ITEM;
flags = btrfs_inode_flags(eb, item);
if (S_ISLNK(rec->imode) &&
flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND))
rec->errors |= I_ERR_ODD_INODE_FLAGS;
/* Directory should never have hard link */
if (S_ISDIR(rec->imode) && rec->nlink >= 2)
rec->errors |= I_ERR_INVALID_NLINK;
/*
* We don't have accurate root info to determine the correct
* inode generation uplimit, use super_generation + 1 anyway
*/
gen_uplimit = btrfs_super_generation(gfs_info->super_copy) + 1;
if (btrfs_inode_generation(eb, item) > gen_uplimit ||
btrfs_inode_transid(eb, item) > gen_uplimit)
rec->errors |= I_ERR_INVALID_GEN;
maybe_free_inode_rec(&active_node->inode_cache, rec);
return 0;
}
static struct inode_backref *get_inode_backref(struct inode_record *rec,
const char *name,
int namelen, u64 dir)
{
struct inode_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
break;
if (backref->dir != dir || backref->namelen != namelen)
continue;
if (memcmp(name, backref->name, namelen))
continue;
return backref;
}
backref = malloc(sizeof(*backref) + namelen + 1);
if (!backref)
return NULL;
memset(backref, 0, sizeof(*backref));
backref->dir = dir;
backref->namelen = namelen;
memcpy(backref->name, name, namelen);
backref->name[namelen] = '\0';
list_add_tail(&backref->list, &rec->backrefs);
return backref;
}
static int add_inode_backref(struct cache_tree *inode_cache,
u64 ino, u64 dir, u64 index,
const char *name, int namelen,
u8 filetype, u8 itemtype, int errors)
{
struct inode_record *rec;
struct inode_backref *backref;
rec = get_inode_rec(inode_cache, ino, 1);
BUG_ON(IS_ERR(rec));
backref = get_inode_backref(rec, name, namelen, dir);
BUG_ON(!backref);
if (errors)
backref->errors |= errors;
if (itemtype == BTRFS_DIR_INDEX_KEY) {
if (backref->found_dir_index)
backref->errors |= REF_ERR_DUP_DIR_INDEX;
if (backref->found_inode_ref && backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
if (backref->found_dir_item && backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
backref->index = index;
backref->filetype = filetype;
backref->found_dir_index = 1;
} else if (itemtype == BTRFS_DIR_ITEM_KEY) {
rec->found_link++;
if (backref->found_dir_item)
backref->errors |= REF_ERR_DUP_DIR_ITEM;
if (backref->found_dir_index && backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
backref->filetype = filetype;
backref->found_dir_item = 1;
} else if ((itemtype == BTRFS_INODE_REF_KEY) ||
(itemtype == BTRFS_INODE_EXTREF_KEY)) {
if (backref->found_inode_ref)
backref->errors |= REF_ERR_DUP_INODE_REF;
if (backref->found_dir_index && backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
else
backref->index = index;
backref->ref_type = itemtype;
backref->found_inode_ref = 1;
} else {
BUG_ON(1);
}
maybe_free_inode_rec(inode_cache, rec);
return 0;
}
static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
struct cache_tree *dst_cache)
{
struct inode_backref *backref;
u32 dir_count = 0;
int ret = 0;
dst->merging = 1;
/*
* If we wandered into a shared node while we were processing an inode
* we may have added backrefs for a directory that had nlink == 0, so
* skip adding these backrefs to our src inode if we have nlink == 0 and
* we actually found the inode item.
*/
if (src->found_inode_item && src->nlink == 0)
goto skip_backrefs;
list_for_each_entry(backref, &src->backrefs, list) {
if (backref->found_dir_index) {
add_inode_backref(dst_cache, dst->ino, backref->dir,
backref->index, backref->name,
backref->namelen, backref->filetype,
BTRFS_DIR_INDEX_KEY, backref->errors);
}
if (backref->found_dir_item) {
dir_count++;
add_inode_backref(dst_cache, dst->ino,
backref->dir, 0, backref->name,
backref->namelen, backref->filetype,
BTRFS_DIR_ITEM_KEY, backref->errors);
}
if (backref->found_inode_ref) {
add_inode_backref(dst_cache, dst->ino,
backref->dir, backref->index,
backref->name, backref->namelen, 0,
backref->ref_type, backref->errors);
}
}
skip_backrefs:
if (src->found_dir_item)
dst->found_dir_item = 1;
if (src->found_file_extent)
dst->found_file_extent = 1;
if (src->found_csum_item)
dst->found_csum_item = 1;
if (src->some_csum_missing)
dst->some_csum_missing = 1;
if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
ret = copy_file_extent_holes(&dst->holes, &src->holes);
if (ret < 0)
return ret;
}
BUG_ON(src->found_link < dir_count);
dst->found_link += src->found_link - dir_count;
dst->found_size += src->found_size;
if (src->extent_start != (u64)-1) {
if (dst->extent_start == (u64)-1) {
dst->extent_start = src->extent_start;
dst->extent_end = src->extent_end;
} else {
if (dst->extent_end > src->extent_start)
dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
else if (dst->extent_end < src->extent_start) {
ret = add_file_extent_hole(&dst->holes,
dst->extent_end,
src->extent_start - dst->extent_end);
}
if (dst->extent_end < src->extent_end)
dst->extent_end = src->extent_end;
}
}
dst->errors |= src->errors;
if (src->found_inode_item) {
if (!dst->found_inode_item) {
dst->nlink = src->nlink;
dst->isize = src->isize;
dst->nbytes = src->nbytes;
dst->imode = src->imode;
dst->nodatasum = src->nodatasum;
dst->found_inode_item = 1;
} else {
dst->errors |= I_ERR_DUP_INODE_ITEM;
}
}
dst->merging = 0;
return 0;
}
static int splice_shared_node(struct shared_node *src_node,
struct shared_node *dst_node)
{
struct cache_extent *cache;
struct ptr_node *node, *ins;
struct cache_tree *src, *dst;
struct inode_record *rec, *conflict;
u64 current_ino = 0;
int splice = 0;
int ret;
if (--src_node->refs == 0)
splice = 1;
if (src_node->current)
current_ino = src_node->current->ino;
src = &src_node->root_cache;
dst = &dst_node->root_cache;
again:
cache = search_cache_extent(src, 0);
while (cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
cache = next_cache_extent(cache);
if (splice) {
remove_cache_extent(src, &node->cache);
ins = node;
} else {
ins = malloc(sizeof(*ins));
BUG_ON(!ins);
ins->cache.start = node->cache.start;
ins->cache.size = node->cache.size;
ins->data = rec;
rec->refs++;
}
ret = insert_cache_extent(dst, &ins->cache);
if (ret == -EEXIST) {
conflict = get_inode_rec(dst, rec->ino, 1);
BUG_ON(IS_ERR(conflict));
merge_inode_recs(rec, conflict, dst);
if (rec->checked) {
conflict->checked = 1;
if (dst_node->current == conflict)
dst_node->current = NULL;
}
maybe_free_inode_rec(dst, conflict);
free_inode_rec(rec);
free(ins);
} else {
BUG_ON(ret);
}
}
if (src == &src_node->root_cache) {
src = &src_node->inode_cache;
dst = &dst_node->inode_cache;
goto again;
}
if (current_ino > 0 && (!dst_node->current ||
current_ino > dst_node->current->ino)) {
if (dst_node->current) {
dst_node->current->checked = 1;
maybe_free_inode_rec(dst, dst_node->current);
}
dst_node->current = get_inode_rec(dst, current_ino, 1);
BUG_ON(IS_ERR(dst_node->current));
}
return 0;
}
static void free_inode_ptr(struct cache_extent *cache)
{
struct ptr_node *node;
struct inode_record *rec;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
free_inode_rec(rec);
free(node);
}
FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
static struct shared_node *find_shared_node(struct cache_tree *shared,
u64 bytenr)
{
struct cache_extent *cache;
struct shared_node *node;
cache = lookup_cache_extent(shared, bytenr, 1);
if (cache) {
node = container_of(cache, struct shared_node, cache);
return node;
}
return NULL;
}
static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
{
int ret;
struct shared_node *node;
node = calloc(1, sizeof(*node));
if (!node)
return -ENOMEM;
node->cache.start = bytenr;
node->cache.size = 1;
cache_tree_init(&node->root_cache);
cache_tree_init(&node->inode_cache);
node->refs = refs;
ret = insert_cache_extent(shared, &node->cache);
return ret;
}
static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
struct walk_control *wc, int level)
{
struct shared_node *node;
struct shared_node *dest;
int ret;
if (level == wc->active_node)
return 0;
BUG_ON(wc->active_node <= level);
node = find_shared_node(&wc->shared, bytenr);
if (!node) {
ret = add_shared_node(&wc->shared, bytenr, refs);
BUG_ON(ret);
node = find_shared_node(&wc->shared, bytenr);
wc->nodes[level] = node;
wc->active_node = level;
return 0;
}
if (wc->root_level == wc->active_node &&
btrfs_root_refs(&root->root_item) == 0) {
if (--node->refs == 0) {
free_inode_recs_tree(&node->root_cache);
free_inode_recs_tree(&node->inode_cache);
remove_cache_extent(&wc->shared, &node->cache);
free(node);
}
return 1;
}
dest = wc->nodes[wc->active_node];
splice_shared_node(node, dest);
if (node->refs == 0) {
remove_cache_extent(&wc->shared, &node->cache);
free(node);
}
return 1;
}
static int leave_shared_node(struct btrfs_root *root,
struct walk_control *wc, int level)
{
struct shared_node *node;
struct shared_node *dest;
int i;
if (level == wc->root_level)
return 0;
for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
if (wc->nodes[i])
break;
}
BUG_ON(i >= BTRFS_MAX_LEVEL);
node = wc->nodes[wc->active_node];
wc->nodes[wc->active_node] = NULL;
wc->active_node = i;
dest = wc->nodes[wc->active_node];
if (wc->active_node < wc->root_level ||
btrfs_root_refs(&root->root_item) > 0) {
BUG_ON(node->refs <= 1);
splice_shared_node(node, dest);
} else {
BUG_ON(node->refs < 2);
node->refs--;
}
return 0;
}
/*
* Returns:
* < 0 - on error
* 1 - if the root with id child_root_id is a child of root parent_root_id
* 0 - if the root child_root_id isn't a child of the root parent_root_id but
* has other root(s) as parent(s)
* 2 - if the root child_root_id doesn't have any parent roots
*/
static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
u64 child_root_id)
{
struct btrfs_path path;
struct btrfs_key key;
struct extent_buffer *leaf;
int has_parent = 0;
int ret;
btrfs_init_path(&path);
key.objectid = parent_root_id;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = child_root_id;
ret = btrfs_search_slot(NULL, gfs_info->tree_root, &key, &path,
0, 0);
if (ret < 0)
return ret;
btrfs_release_path(&path);
if (!ret)
return 1;
key.objectid = child_root_id;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, gfs_info->tree_root, &key, &path,
0, 0);
if (ret < 0)
goto out;
while (1) {
leaf = path.nodes[0];
if (path.slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(gfs_info->tree_root, &path);
if (ret)
break;
leaf = path.nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.objectid != child_root_id ||
key.type != BTRFS_ROOT_BACKREF_KEY)
break;
has_parent = 1;
if (key.offset == parent_root_id) {
btrfs_release_path(&path);
return 1;
}
path.slots[0]++;
}
out:
btrfs_release_path(&path);
if (ret < 0)
return ret;
return has_parent ? 0 : 2;
}
static int add_mismatch_dir_hash(struct inode_record *dir_rec,
struct btrfs_key *key, const char *namebuf,
int namelen)
{
struct mismatch_dir_hash_record *hash_record;
hash_record = malloc(sizeof(*hash_record) + namelen);
if (!hash_record) {
error_msg(ERROR_MSG_MEMORY, "mismatch dir hash record");
return -ENOMEM;
}
memcpy(&hash_record->key, key, sizeof(*key));
memcpy(hash_record + 1, namebuf, namelen);
hash_record->namelen = namelen;
list_add(&hash_record->list, &dir_rec->mismatch_dir_hash);
return 0;
}
static int process_dir_item(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u32 data_len;
int error;
int nritems = 0;
u8 filetype;
struct btrfs_dir_item *di;
struct inode_record *rec;
struct cache_tree *root_cache;
struct cache_tree *inode_cache;
struct btrfs_key location;
char namebuf[BTRFS_NAME_LEN];
root_cache = &active_node->root_cache;
inode_cache = &active_node->inode_cache;
rec = active_node->current;
rec->found_dir_item = 1;
if (rec->found_inode_item && rec->nlink == 0)
return 0;
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
total = btrfs_item_size(eb, slot);
while (cur < total) {
int ret;
nritems++;
btrfs_dir_item_key_to_cpu(eb, di, &location);
name_len = btrfs_dir_name_len(eb, di);
data_len = btrfs_dir_data_len(eb, di);
filetype = btrfs_dir_type(eb, di);
rec->found_size += name_len;
btrfs-progs: check: Avoid reading beyond item boundary for dir_item and dir_index When reading out name from inode_ref, it's possible that corrupted name_len can lead to read beyond boundary of item or even extent buffer. This happens when checking fuzzed image /tmp/bko-161811.raw, for both lowmem mode and original mode. Below is the example from lowmem mode. ERROR: root 5 INODE REF[256 256] doesn't have related DIR_INDEX[256 216172782113783808] namelen 255 filename bar filetype 0 ERROR: root 5 INODE REF[256 256] doesn't have related DIR_ITEM[256 1306590535] namelen 255 filename bar filetype 0 WARNING: root 5 INODE[256] mode 0 shouldn't have DIR_INDEX[256 1167283096] WARNING: root 5 DIR_ITEM[256 1167283096] name too long ==13013== Invalid read of size 1 ==13013== at 0x4C31A38: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13013== by 0x431518: read_extent_buffer (extent_io.c:863) ==13013== by 0x4752AB: check_dir_item (cmds-check.c:4627) ==13013== by 0x475E5C: check_inode_item (cmds-check.c:4911) ==13013== by 0x476200: check_fs_first_inode (cmds-check.c:5011) ==13013== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13013== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13013== by 0x488B5B: cmd_check (cmds-check.c:13033) ==13013== by 0x40A8C5: main (btrfs.c:246) ==13013== Address 0x5c95b80 is 0 bytes after a block of size 4,224 alloc'd ==13013== at 0x4C2CF35: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13013== by 0x4307E0: __alloc_extent_buffer (extent_io.c:538) ==13013== by 0x430C37: alloc_extent_buffer (extent_io.c:642) ==13013== by 0x413DFE: btrfs_find_create_tree_block (disk-io.c:193) ==13013== by 0x414370: read_tree_block_fs_info (disk-io.c:340) ==13013== by 0x40B5D5: read_tree_block (disk-io.h:125) ==13013== by 0x40CFD2: read_node_slot (ctree.c:652) ==13013== by 0x40E5EB: btrfs_search_slot (ctree.c:1172) ==13013== by 0x4761A8: check_fs_first_inode (cmds-check.c:5001) ==13013== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13013== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13013== by 0x488B5B: cmd_check (cmds-check.c:13033) Fix it by double checking dir_item, name_len against item boundary before trying to read out name from extent buffer, for both original mode and lowmem mode. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 16:42:40 +08:00
if (cur + sizeof(*di) + name_len > total ||
name_len > BTRFS_NAME_LEN) {
error = REF_ERR_NAME_TOO_LONG;
if (cur + sizeof(*di) > total)
break;
len = min_t(u32, total - cur - sizeof(*di),
BTRFS_NAME_LEN);
} else {
len = name_len;
error = 0;
}
btrfs-progs: check: Avoid reading beyond item boundary for dir_item and dir_index When reading out name from inode_ref, it's possible that corrupted name_len can lead to read beyond boundary of item or even extent buffer. This happens when checking fuzzed image /tmp/bko-161811.raw, for both lowmem mode and original mode. Below is the example from lowmem mode. ERROR: root 5 INODE REF[256 256] doesn't have related DIR_INDEX[256 216172782113783808] namelen 255 filename bar filetype 0 ERROR: root 5 INODE REF[256 256] doesn't have related DIR_ITEM[256 1306590535] namelen 255 filename bar filetype 0 WARNING: root 5 INODE[256] mode 0 shouldn't have DIR_INDEX[256 1167283096] WARNING: root 5 DIR_ITEM[256 1167283096] name too long ==13013== Invalid read of size 1 ==13013== at 0x4C31A38: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13013== by 0x431518: read_extent_buffer (extent_io.c:863) ==13013== by 0x4752AB: check_dir_item (cmds-check.c:4627) ==13013== by 0x475E5C: check_inode_item (cmds-check.c:4911) ==13013== by 0x476200: check_fs_first_inode (cmds-check.c:5011) ==13013== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13013== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13013== by 0x488B5B: cmd_check (cmds-check.c:13033) ==13013== by 0x40A8C5: main (btrfs.c:246) ==13013== Address 0x5c95b80 is 0 bytes after a block of size 4,224 alloc'd ==13013== at 0x4C2CF35: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13013== by 0x4307E0: __alloc_extent_buffer (extent_io.c:538) ==13013== by 0x430C37: alloc_extent_buffer (extent_io.c:642) ==13013== by 0x413DFE: btrfs_find_create_tree_block (disk-io.c:193) ==13013== by 0x414370: read_tree_block_fs_info (disk-io.c:340) ==13013== by 0x40B5D5: read_tree_block (disk-io.h:125) ==13013== by 0x40CFD2: read_node_slot (ctree.c:652) ==13013== by 0x40E5EB: btrfs_search_slot (ctree.c:1172) ==13013== by 0x4761A8: check_fs_first_inode (cmds-check.c:5001) ==13013== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13013== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13013== by 0x488B5B: cmd_check (cmds-check.c:13033) Fix it by double checking dir_item, name_len against item boundary before trying to read out name from extent buffer, for both original mode and lowmem mode. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 16:42:40 +08:00
read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
if (key->type == BTRFS_DIR_ITEM_KEY &&
key->offset != btrfs_name_hash(namebuf, len)) {
rec->errors |= I_ERR_MISMATCH_DIR_HASH;
ret = add_mismatch_dir_hash(rec, key, namebuf, len);
/* Fatal error, ENOMEM */
if (ret < 0)
return ret;
goto next;
}
if (location.type == BTRFS_INODE_ITEM_KEY) {
add_inode_backref(inode_cache, location.objectid,
key->objectid, key->offset, namebuf,
len, filetype, key->type, error);
} else if (location.type == BTRFS_ROOT_ITEM_KEY) {
add_inode_backref(root_cache, location.objectid,
key->objectid, key->offset,
namebuf, len, filetype,
key->type, error);
} else {
fprintf(stderr,
"unknown location type %d in DIR_ITEM[%llu %llu]\n",
location.type, key->objectid, key->offset);
add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
key->objectid, key->offset, namebuf,
len, filetype, key->type, error);
}
next:
len = sizeof(*di) + name_len + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
rec->errors |= I_ERR_DUP_DIR_INDEX;
return 0;
}
static int process_xattr_item(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
struct btrfs_dir_item *di;
struct inode_record *rec;
rec = active_node->current;
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
total = btrfs_item_size(eb, slot);
while (cur < total) {
u32 name_len = btrfs_dir_name_len(eb, di);
u32 data_len = btrfs_dir_data_len(eb, di);
u32 len;
if (name_len > BTRFS_NAME_LEN) {
char *name = malloc(name_len);
if (!name)
return -ENOMEM;
read_extent_buffer(eb, name,
(unsigned long)(di + 1), name_len);
fprintf(stderr,
"inode %llu has overlong xattr name %.*s\n",
key->objectid, name_len, name);
free(name);
rec->errors |= I_ERR_INVALID_XATTR;
}
len = sizeof(*di) + name_len + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
return 0;
}
static int process_inode_ref(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
int error;
struct cache_tree *inode_cache;
struct btrfs_inode_ref *ref;
char namebuf[BTRFS_NAME_LEN];
inode_cache = &active_node->inode_cache;
ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
total = btrfs_item_size(eb, slot);
while (cur < total) {
name_len = btrfs_inode_ref_name_len(eb, ref);
index = btrfs_inode_ref_index(eb, ref);
btrfs-progs: check: Avoid reading beyond item boundary for inode_ref When reading out name from inode_ref, it's possible that corrupted name_len can lead to read beyond boundary of item or even extent buffer. This happens when checking fuzzed image /tmp/bko-161811.raw, for both lowmem mode and original mode. ERROR: root 5 INODE REF[256 256] doesn't have related DIR_INDEX[256 504403158265495680] namelen 0 filename filetype 0 ERROR: root 5 INODE REF[256 256] doesn't have related DIR_ITEM[256 4294967294] namelen 0 filename filetype 0 WARNING: root 5 INODE_REF[256 256] name too long ==13022== Invalid read of size 8 ==13022== at 0x4C319BE: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x431518: read_extent_buffer (extent_io.c:863) ==13022== by 0x474730: check_inode_ref (cmds-check.c:4307) ==13022== by 0x475D65: check_inode_item (cmds-check.c:4890) ==13022== by 0x476200: check_fs_first_inode (cmds-check.c:5011) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) ==13022== by 0x40A8C5: main (btrfs.c:246) ==13022== Address 0x5c96780 is 0 bytes after a block of size 4,224 alloc'd ==13022== at 0x4C2CF35: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x4307E0: __alloc_extent_buffer (extent_io.c:538) ==13022== by 0x430C37: alloc_extent_buffer (extent_io.c:642) ==13022== by 0x413DFE: btrfs_find_create_tree_block (disk-io.c:193) ==13022== by 0x414370: read_tree_block_fs_info (disk-io.c:340) ==13022== by 0x40B5D5: read_tree_block (disk-io.h:125) ==13022== by 0x40CFD2: read_node_slot (ctree.c:652) ==13022== by 0x40E5EB: btrfs_search_slot (ctree.c:1172) ==13022== by 0x4761A8: check_fs_first_inode (cmds-check.c:5001) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) = Fix it by double checking inode_ref, name_len against item boundary before trying to read out name from extent buffer, for both original mode and lowmem mode. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 16:42:39 +08:00
/* inode_ref + namelen should not cross item boundary */
if (cur + sizeof(*ref) + name_len > total ||
name_len > BTRFS_NAME_LEN) {
if (total < cur + sizeof(*ref))
break;
/* Still try to read out the remaining part */
len = min_t(u32, total - cur - sizeof(*ref),
BTRFS_NAME_LEN);
error = REF_ERR_NAME_TOO_LONG;
} else {
len = name_len;
error = 0;
}
btrfs-progs: check: Avoid reading beyond item boundary for inode_ref When reading out name from inode_ref, it's possible that corrupted name_len can lead to read beyond boundary of item or even extent buffer. This happens when checking fuzzed image /tmp/bko-161811.raw, for both lowmem mode and original mode. ERROR: root 5 INODE REF[256 256] doesn't have related DIR_INDEX[256 504403158265495680] namelen 0 filename filetype 0 ERROR: root 5 INODE REF[256 256] doesn't have related DIR_ITEM[256 4294967294] namelen 0 filename filetype 0 WARNING: root 5 INODE_REF[256 256] name too long ==13022== Invalid read of size 8 ==13022== at 0x4C319BE: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x431518: read_extent_buffer (extent_io.c:863) ==13022== by 0x474730: check_inode_ref (cmds-check.c:4307) ==13022== by 0x475D65: check_inode_item (cmds-check.c:4890) ==13022== by 0x476200: check_fs_first_inode (cmds-check.c:5011) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) ==13022== by 0x40A8C5: main (btrfs.c:246) ==13022== Address 0x5c96780 is 0 bytes after a block of size 4,224 alloc'd ==13022== at 0x4C2CF35: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x4307E0: __alloc_extent_buffer (extent_io.c:538) ==13022== by 0x430C37: alloc_extent_buffer (extent_io.c:642) ==13022== by 0x413DFE: btrfs_find_create_tree_block (disk-io.c:193) ==13022== by 0x414370: read_tree_block_fs_info (disk-io.c:340) ==13022== by 0x40B5D5: read_tree_block (disk-io.h:125) ==13022== by 0x40CFD2: read_node_slot (ctree.c:652) ==13022== by 0x40E5EB: btrfs_search_slot (ctree.c:1172) ==13022== by 0x4761A8: check_fs_first_inode (cmds-check.c:5001) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) = Fix it by double checking inode_ref, name_len against item boundary before trying to read out name from extent buffer, for both original mode and lowmem mode. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 16:42:39 +08:00
read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
add_inode_backref(inode_cache, key->objectid, key->offset,
index, namebuf, len, 0, key->type, error);
len = sizeof(*ref) + name_len;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
}
return 0;
}
static int process_inode_extref(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
u64 parent;
int error;
struct cache_tree *inode_cache;
struct btrfs_inode_extref *extref;
char namebuf[BTRFS_NAME_LEN];
inode_cache = &active_node->inode_cache;
extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
total = btrfs_item_size(eb, slot);
while (cur < total) {
name_len = btrfs_inode_extref_name_len(eb, extref);
index = btrfs_inode_extref_index(eb, extref);
parent = btrfs_inode_extref_parent(eb, extref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
error = 0;
} else {
len = BTRFS_NAME_LEN;
error = REF_ERR_NAME_TOO_LONG;
}
read_extent_buffer(eb, namebuf,
(unsigned long)(extref + 1), len);
add_inode_backref(inode_cache, key->objectid, parent,
index, namebuf, len, 0, key->type, error);
len = sizeof(*extref) + name_len;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
}
return 0;
}
static int process_file_extent(struct btrfs_root *root,
struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
struct inode_record *rec;
struct btrfs_file_extent_item *fi;
u64 num_bytes = 0;
u64 disk_bytenr = 0;
u64 extent_offset = 0;
u64 mask = gfs_info->sectorsize - 1;
u32 max_inline_size = min_t(u32, mask,
BTRFS_MAX_INLINE_DATA_SIZE(gfs_info));
u8 compression;
int extent_type;
int ret;
rec = active_node->current;
BUG_ON(rec->ino != key->objectid || rec->refs > 1);
rec->found_file_extent = 1;
if (rec->extent_start == (u64)-1) {
rec->extent_start = key->offset;
rec->extent_end = key->offset;
}
if (rec->extent_end > key->offset)
rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
else if (rec->extent_end < key->offset) {
ret = add_file_extent_hole(&rec->holes, rec->extent_end,
key->offset - rec->extent_end);
if (ret < 0)
return ret;
}
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(eb, fi);
compression = btrfs_file_extent_compression(eb, fi);
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
num_bytes = btrfs_file_extent_ram_bytes(eb, fi);
if (num_bytes == 0)
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (compression) {
if (btrfs_file_extent_inline_item_len(eb, slot) >
max_inline_size ||
num_bytes > gfs_info->sectorsize)
rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
} else {
if (num_bytes > max_inline_size)
rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
if (btrfs_file_extent_inline_item_len(eb, slot) !=
num_bytes)
rec->errors |= I_ERR_INLINE_RAM_BYTES_WRONG;
}
rec->found_size += num_bytes;
num_bytes = (num_bytes + mask) & ~mask;
} else if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
extent_offset = btrfs_file_extent_offset(eb, fi);
if (num_bytes == 0 || (num_bytes & mask))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (num_bytes + extent_offset >
btrfs_file_extent_ram_bytes(eb, fi))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
(btrfs_file_extent_compression(eb, fi) ||
btrfs_file_extent_encryption(eb, fi) ||
btrfs_file_extent_other_encoding(eb, fi)))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (compression && rec->nodatasum)
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (disk_bytenr > 0)
rec->found_size += num_bytes;
} else {
rec->errors |= I_ERR_BAD_FILE_EXTENT;
}
rec->extent_end = key->offset + num_bytes;
/*
* The data reloc tree will copy full extents into its inode and then
* copy the corresponding csums. Because the extent it copied could be
* a preallocated extent that hasn't been written to yet there may be no
* csums to copy, ergo we won't have csums for our file extent. This is
* ok so just don't bother checking csums if the inode belongs to the
* data reloc tree.
*/
if (disk_bytenr > 0 &&
btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 found;
if (compression)
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
else
disk_bytenr += extent_offset;
ret = count_csum_range(disk_bytenr, num_bytes, &found);
if (ret < 0)
return ret;
if (extent_type == BTRFS_FILE_EXTENT_REG) {
if (found > 0)
rec->found_csum_item = 1;
if (found < num_bytes)
rec->some_csum_missing = 1;
if (compression && found < num_bytes)
rec->errors |= I_ERR_SOME_CSUM_MISSING;
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
Btrfs-progs: check, fix false error reports for shared prealloc extents Under some cases the filesystem checker reports an error when it finds checksum items for an extent that is referenced by an inode as a prealloc extent. Such cases are not an error when the extent is actually shared (was cloned/reflinked) with other inodes and was written through one of those other inodes. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foo $ xfs_io -c "falloc 0 256K" /mnt/foo $ sync $ xfs_io -c "pwrite -S 0xab 0 256K" /mnt/foo $ touch /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 0 256K" /mnt/bar $ xfs_io -c "fsync" /mnt/bar <power fail> $ mount /dev/sdb /mnt $ umount /mnt $ btrfs check /dev/sdc Checking filesystem on /dev/sdb UUID: 52d3006e-ee3b-40eb-aa21-e56253a03d39 checking extents checking free space cache checking fs roots root 5 inode 257 errors 800, odd csum item ERROR: errors found in fs roots found 688128 bytes used, error(s) found total csum bytes: 256 total tree bytes: 163840 total fs tree bytes: 65536 total extent tree bytes: 16384 btree space waste bytes: 138819 file data blocks allocated: 10747904 referenced 10747904 $ echo $? 1 So teach check to not report such cases as errors by checking if the extent is shared with other inodes and if so, consider it an error the existence of checksum items only if all those other inodes are referencing the extent as a prealloc extent. This case can be hit often when running the generic/475 testcase from fstests. A test case will follow in a separate patch. Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-03-15 04:11:09 +08:00
if (found > 0) {
ret = check_prealloc_extent_written(disk_bytenr,
Btrfs-progs: check, fix false error reports for shared prealloc extents Under some cases the filesystem checker reports an error when it finds checksum items for an extent that is referenced by an inode as a prealloc extent. Such cases are not an error when the extent is actually shared (was cloned/reflinked) with other inodes and was written through one of those other inodes. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ touch /mnt/foo $ xfs_io -c "falloc 0 256K" /mnt/foo $ sync $ xfs_io -c "pwrite -S 0xab 0 256K" /mnt/foo $ touch /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 0 256K" /mnt/bar $ xfs_io -c "fsync" /mnt/bar <power fail> $ mount /dev/sdb /mnt $ umount /mnt $ btrfs check /dev/sdc Checking filesystem on /dev/sdb UUID: 52d3006e-ee3b-40eb-aa21-e56253a03d39 checking extents checking free space cache checking fs roots root 5 inode 257 errors 800, odd csum item ERROR: errors found in fs roots found 688128 bytes used, error(s) found total csum bytes: 256 total tree bytes: 163840 total fs tree bytes: 65536 total extent tree bytes: 16384 btree space waste bytes: 138819 file data blocks allocated: 10747904 referenced 10747904 $ echo $? 1 So teach check to not report such cases as errors by checking if the extent is shared with other inodes and if so, consider it an error the existence of checksum items only if all those other inodes are referencing the extent as a prealloc extent. This case can be hit often when running the generic/475 testcase from fstests. A test case will follow in a separate patch. Signed-off-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-03-15 04:11:09 +08:00
num_bytes);
if (ret < 0)
return ret;
if (ret == 0)
rec->errors |= I_ERR_ODD_CSUM_ITEM;
}
}
}
return 0;
}
static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
struct walk_control *wc)
{
struct btrfs_key key;
u32 nritems;
int i;
int ret = 0;
struct cache_tree *inode_cache;
struct shared_node *active_node;
if (wc->root_level == wc->active_node &&
btrfs_root_refs(&root->root_item) == 0)
return 0;
active_node = wc->nodes[wc->active_node];
inode_cache = &active_node->inode_cache;
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
continue;
if (key.type == BTRFS_ORPHAN_ITEM_KEY)
continue;
if (active_node->current == NULL ||
active_node->current->ino < key.objectid) {
if (active_node->current) {
active_node->current->checked = 1;
maybe_free_inode_rec(inode_cache,
active_node->current);
}
active_node->current = get_inode_rec(inode_cache,
key.objectid, 1);
BUG_ON(IS_ERR(active_node->current));
}
switch (key.type) {
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
ret = process_dir_item(eb, i, &key, active_node);
break;
case BTRFS_INODE_REF_KEY:
ret = process_inode_ref(eb, i, &key, active_node);
break;
case BTRFS_INODE_EXTREF_KEY:
ret = process_inode_extref(eb, i, &key, active_node);
break;
case BTRFS_INODE_ITEM_KEY:
ret = process_inode_item(eb, i, &key, active_node);
break;
case BTRFS_EXTENT_DATA_KEY:
ret = process_file_extent(root, eb, i, &key,
active_node);
break;
case BTRFS_XATTR_ITEM_KEY:
ret = process_xattr_item(eb, i, &key, active_node);
break;
default:
break;
};
}
return ret;
}
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
struct walk_control *wc, int *level,
struct node_refs *nrefs)
{
enum btrfs_tree_block_status status;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
struct extent_buffer *cur;
int ret, err = 0;
u64 refs;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
refs = nrefs->refs[*level];
ret = 0;
} else {
ret = btrfs_lookup_extent_info(NULL, gfs_info,
path->nodes[*level]->start,
*level, 1, &refs, NULL);
if (ret < 0) {
err = ret;
goto out;
}
nrefs->bytenr[*level] = path->nodes[*level]->start;
nrefs->refs[*level] = refs;
}
if (refs > 1) {
ret = enter_shared_node(root, path->nodes[*level]->start,
refs, wc, *level);
if (ret > 0) {
err = ret;
goto out;
}
}
while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
if (btrfs_header_level(cur) != *level)
WARN_ON(1);
if (path->slots[*level] >= btrfs_header_nritems(cur))
break;
if (*level == 0) {
ret = process_one_leaf(root, cur, wc);
if (ret < 0)
err = ret;
break;
}
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
if (bytenr == nrefs->bytenr[*level - 1]) {
refs = nrefs->refs[*level - 1];
} else {
ret = btrfs_lookup_extent_info(NULL, gfs_info, bytenr,
*level - 1, 1, &refs, NULL);
if (ret < 0) {
refs = 0;
} else {
nrefs->bytenr[*level - 1] = bytenr;
nrefs->refs[*level - 1] = refs;
}
}
if (refs > 1) {
ret = enter_shared_node(root, bytenr, refs,
wc, *level - 1);
if (ret > 0) {
path->slots[*level]++;
continue;
}
}
next = btrfs_find_tree_block(gfs_info, bytenr, gfs_info->nodesize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
free_extent_buffer(next);
reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(gfs_info, bytenr, ptr_gen);
if (!extent_buffer_uptodate(next)) {
struct btrfs_key node_key;
btrfs_node_key_to_cpu(path->nodes[*level],
&node_key,
path->slots[*level]);
btrfs_add_corrupt_extent_record(gfs_info,
&node_key,
path->nodes[*level]->start,
gfs_info->nodesize,
*level);
err = -EIO;
goto out;
}
}
ret = check_child_node(cur, path->slots[*level], next);
if (ret) {
free_extent_buffer(next);
err = ret;
goto out;
}
if (btrfs_is_leaf(next))
status = btrfs_check_leaf(gfs_info, NULL, next);
else
status = btrfs_check_node(gfs_info, NULL, next);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
free_extent_buffer(next);
err = -EIO;
goto out;
}
*level = *level - 1;
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = next;
path->slots[*level] = 0;
}
out:
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
return err;
}
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
struct walk_control *wc, int *level)
{
int i;
struct extent_buffer *leaf;
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
leaf = path->nodes[i];
if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
path->slots[i]++;
*level = i;
return 0;
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
BUG_ON(*level > wc->active_node);
if (*level == wc->active_node)
leave_shared_node(root, wc, *level);
*level = i + 1;
}
return 1;
}
static int check_root_dir(struct inode_record *rec)
{
struct inode_backref *backref;
int ret = -1;
if (rec->errors)
goto out;
if (!rec->found_inode_item) {
rec->errors |= I_ERR_NO_INODE_ITEM;
goto out;
}
if (rec->nlink != 1 || rec->found_link != 0) {
rec->errors |= I_ERR_LINK_COUNT_WRONG;
goto out;
}
if (list_empty(&rec->backrefs)) {
rec->errors |= REF_ERR_NO_ROOT_BACKREF;
goto out;
}
backref = to_inode_backref(rec->backrefs.next);
if (!backref->found_inode_ref) {
rec->errors |= REF_ERR_NO_INODE_REF;
goto out;
}
if (backref->index != 0 || backref->namelen != 2 ||
memcmp(backref->name, "..", 2)) {
rec->errors |= I_ERR_ODD_DIR_ITEM;
goto out;
}
if (backref->found_dir_index) {
rec->errors |= REF_ERR_DUP_DIR_INDEX;
goto out;
}
if (backref->found_dir_item) {
rec->errors |= REF_ERR_DUP_DIR_ITEM;
goto out;
}
ret = 0;
out:
return ret;
}
static int repair_inode_isize(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_inode_item *ei;
struct btrfs_key key;
int ret;
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out;
if (ret) {
if (!path->slots[0]) {
ret = -ENOENT;
goto out;
}
path->slots[0]--;
ret = 0;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != rec->ino) {
ret = -ENOENT;
goto out;
}
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
btrfs_mark_buffer_dirty(path->nodes[0]);
rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
printf("reset isize for dir %llu root %llu\n", rec->ino,
root->root_key.objectid);
out:
btrfs_release_path(path);
return ret;
}
static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
int ret;
ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
btrfs_release_path(path);
if (!ret)
rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
return ret;
}
static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_inode_item *ei;
struct btrfs_key key;
int ret = 0;
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto out;
}
/* Since ret == 0, no need to check anything */
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
btrfs_mark_buffer_dirty(path->nodes[0]);
rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
printf("reset nbytes for ino %llu root %llu\n",
rec->ino, root->root_key.objectid);
out:
btrfs_release_path(path);
return ret;
}
static int add_missing_dir_index(struct btrfs_root *root,
struct cache_tree *inode_cache,
struct inode_record *rec,
struct inode_backref *backref)
{
struct btrfs_path path;
struct btrfs_trans_handle *trans;
struct btrfs_dir_item *dir_item;
struct extent_buffer *leaf;
struct btrfs_key key;
struct btrfs_disk_key disk_key;
struct inode_record *dir_rec;
unsigned long name_ptr;
u32 data_size = sizeof(*dir_item) + backref->namelen;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
fprintf(stderr, "repairing missing dir index item for inode %llu\n",
(unsigned long long)rec->ino);
btrfs_init_path(&path);
key.objectid = backref->dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = backref->index;
ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
BUG_ON(ret);
leaf = path.nodes[0];
dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
disk_key.objectid = cpu_to_le64(rec->ino);
disk_key.type = BTRFS_INODE_ITEM_KEY;
disk_key.offset = 0;
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
btrfs_set_dir_data_len(leaf, dir_item, 0);
btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
name_ptr = (unsigned long)(dir_item + 1);
write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
backref->found_dir_index = 1;
dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
BUG_ON(IS_ERR(dir_rec));
if (!dir_rec)
return 0;
dir_rec->found_size += backref->namelen;
if (dir_rec->found_size == dir_rec->isize &&
(dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
if (dir_rec->found_size != dir_rec->isize)
dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
return 0;
}
static int delete_dir_index(struct btrfs_root *root,
struct inode_backref *backref)
{
struct btrfs_trans_handle *trans;
struct btrfs_dir_item *di;
struct btrfs_path path;
int ret = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
(unsigned long long)backref->dir,
BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
(unsigned long long)root->objectid);
btrfs_init_path(&path);
di = btrfs_lookup_dir_index_item(trans, root, &path, backref->dir,
backref->index, backref->name,
backref->namelen, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
if (ret == -ENOENT)
return 0;
return ret;
}
if (!di)
ret = btrfs_del_item(trans, root, &path);
else
ret = btrfs_delete_one_dir_name(trans, root, &path, di);
BUG_ON(ret);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
return ret;
}
static int create_inode_item(struct btrfs_root *root,
struct inode_record *rec, int root_dir)
{
struct btrfs_trans_handle *trans;
u64 nlink = 0;
u32 mode = 0;
u64 size = 0;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
return ret;
}
nlink = root_dir ? 1 : rec->found_link;
if (rec->found_dir_item) {
if (rec->found_file_extent)
fprintf(stderr, "root %llu inode %llu has both a dir "
"item and extents, unsure if it is a dir or a "
"regular file so setting it as a directory\n",
(unsigned long long)root->objectid,
(unsigned long long)rec->ino);
mode = S_IFDIR | 0755;
size = rec->found_size;
} else if (!rec->found_dir_item) {
size = rec->extent_end;
mode = S_IFREG | 0755;
}
ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
nlink, mode);
btrfs_commit_transaction(trans, root);
return 0;
}
static int repair_inode_backrefs(struct btrfs_root *root,
struct inode_record *rec,
struct cache_tree *inode_cache,
int delete)
{
struct inode_backref *tmp, *backref;
u64 root_dirid = btrfs_root_dirid(&root->root_item);
int ret = 0;
int repaired = 0;
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
if (!delete && rec->ino == root_dirid) {
if (!rec->found_inode_item) {
ret = create_inode_item(root, rec, 1);
if (ret)
break;
repaired++;
}
}
/* Index 0 for root dir's are special, don't mess with it */
if (rec->ino == root_dirid && backref->index == 0)
continue;
if (delete &&
((backref->found_dir_index && !backref->found_inode_ref) ||
(backref->found_dir_index && backref->found_inode_ref &&
(backref->errors & REF_ERR_INDEX_UNMATCH)))) {
ret = delete_dir_index(root, backref);
if (ret)
break;
repaired++;
list_del(&backref->list);
free(backref);
btrfs-progs: check: Fix heap use after free fsck/004-no-dir-index makes valgrinds complaining about Invalid read. ==31890== Invalid read of size 1 ==31890== at 0x453D09: repair_inode_backrefs (cmds-check.c:2690) ==31890== by 0x453D09: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D09: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Address 0x5cb7b90 is 16 bytes inside a block of size 50 free'd ==31890== at 0x4C2C14B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x453D08: repair_inode_backrefs (cmds-check.c:2684) ==31890== by 0x453D08: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D08: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Block was alloc'd at ==31890== at 0x4C2AF1F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x45055C: get_inode_backref (cmds-check.c:1075) ==31890== by 0x45055C: add_inode_backref (cmds-check.c:1097) ==31890== by 0x45180C: process_dir_item (cmds-check.c:1525) ==31890== by 0x45180C: process_one_leaf (cmds-check.c:1838) ==31890== by 0x45180C: walk_down_tree (cmds-check.c:2134) ==31890== by 0x45180C: check_fs_root (cmds-check.c:3957) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== ==31890== Invalid read of size 8 ==31890== at 0x452D66: repair_inode_backrefs (cmds-check.c:2731) ==31890== by 0x452D66: check_inode_recs (cmds-check.c:3330) ==31890== by 0x452D66: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Address 0x5cb7b90 is 16 bytes inside a block of size 50 free'd ==31890== at 0x4C2C14B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x453D08: repair_inode_backrefs (cmds-check.c:2684) ==31890== by 0x453D08: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D08: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Block was alloc'd at ==31890== at 0x4C2AF1F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x45055C: get_inode_backref (cmds-check.c:1075) ==31890== by 0x45055C: add_inode_backref (cmds-check.c:1097) ==31890== by 0x45180C: process_dir_item (cmds-check.c:1525) ==31890== by 0x45180C: process_one_leaf (cmds-check.c:1838) ==31890== by 0x45180C: walk_down_tree (cmds-check.c:2134) ==31890== by 0x45180C: check_fs_root (cmds-check.c:3957) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== While iterating over backrefs in repair_inode_backrefs, there are several situations to repair one backref according backref->found_dir_item and backref->found_dir_index. Two of these branches may free the backref, but next checks will still access the freed memory. Because these branches are independent, let repair_inode_backrefs skip to handle next backref after free can fix it. Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 09:50:14 +08:00
continue;
}
if (!delete && !backref->found_dir_index &&
backref->found_dir_item && backref->found_inode_ref) {
ret = add_missing_dir_index(root, inode_cache, rec,
backref);
if (ret)
break;
repaired++;
if (backref->found_dir_item &&
backref->found_dir_index) {
if (!backref->errors &&
backref->found_inode_ref) {
list_del(&backref->list);
free(backref);
btrfs-progs: check: Fix heap use after free fsck/004-no-dir-index makes valgrinds complaining about Invalid read. ==31890== Invalid read of size 1 ==31890== at 0x453D09: repair_inode_backrefs (cmds-check.c:2690) ==31890== by 0x453D09: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D09: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Address 0x5cb7b90 is 16 bytes inside a block of size 50 free'd ==31890== at 0x4C2C14B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x453D08: repair_inode_backrefs (cmds-check.c:2684) ==31890== by 0x453D08: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D08: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Block was alloc'd at ==31890== at 0x4C2AF1F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x45055C: get_inode_backref (cmds-check.c:1075) ==31890== by 0x45055C: add_inode_backref (cmds-check.c:1097) ==31890== by 0x45180C: process_dir_item (cmds-check.c:1525) ==31890== by 0x45180C: process_one_leaf (cmds-check.c:1838) ==31890== by 0x45180C: walk_down_tree (cmds-check.c:2134) ==31890== by 0x45180C: check_fs_root (cmds-check.c:3957) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== ==31890== Invalid read of size 8 ==31890== at 0x452D66: repair_inode_backrefs (cmds-check.c:2731) ==31890== by 0x452D66: check_inode_recs (cmds-check.c:3330) ==31890== by 0x452D66: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Address 0x5cb7b90 is 16 bytes inside a block of size 50 free'd ==31890== at 0x4C2C14B: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x453D08: repair_inode_backrefs (cmds-check.c:2684) ==31890== by 0x453D08: check_inode_recs (cmds-check.c:3330) ==31890== by 0x453D08: check_fs_root (cmds-check.c:4012) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== Block was alloc'd at ==31890== at 0x4C2AF1F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==31890== by 0x45055C: get_inode_backref (cmds-check.c:1075) ==31890== by 0x45055C: add_inode_backref (cmds-check.c:1097) ==31890== by 0x45180C: process_dir_item (cmds-check.c:1525) ==31890== by 0x45180C: process_one_leaf (cmds-check.c:1838) ==31890== by 0x45180C: walk_down_tree (cmds-check.c:2134) ==31890== by 0x45180C: check_fs_root (cmds-check.c:3957) ==31890== by 0x45E788: check_fs_roots (cmds-check.c:4098) ==31890== by 0x45E788: cmd_check (cmds-check.c:13031) ==31890== by 0x40A88A: main (btrfs.c:246) ==31890== While iterating over backrefs in repair_inode_backrefs, there are several situations to repair one backref according backref->found_dir_item and backref->found_dir_index. Two of these branches may free the backref, but next checks will still access the freed memory. Because these branches are independent, let repair_inode_backrefs skip to handle next backref after free can fix it. Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 09:50:14 +08:00
continue;
}
}
}
if (!delete && (!backref->found_dir_index &&
!backref->found_dir_item &&
backref->found_inode_ref)) {
struct btrfs_trans_handle *trans;
struct btrfs_key location;
ret = check_dir_conflict(root, backref->name,
backref->namelen,
backref->dir,
backref->index);
if (ret) {
/*
* let nlink fixing routine to handle it,
* which can do it better.
*/
ret = 0;
break;
}
location.objectid = rec->ino;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
fprintf(stderr, "adding missing dir index/item pair "
"for inode %llu\n",
(unsigned long long)rec->ino);
ret = btrfs_insert_dir_item(trans, root, backref->name,
backref->namelen,
backref->dir, &location,
imode_to_type(rec->imode),
backref->index);
BUG_ON(ret);
btrfs_commit_transaction(trans, root);
repaired++;
}
if (!delete && (backref->found_inode_ref &&
backref->found_dir_index &&
backref->found_dir_item &&
!(backref->errors & REF_ERR_INDEX_UNMATCH) &&
!rec->found_inode_item)) {
ret = create_inode_item(root, rec, 0);
if (ret)
break;
repaired++;
}
}
return ret ? ret : repaired;
}
/*
* To determine the file type for nlink/inode_item repair
*
* Return 0 if file type is found and BTRFS_FT_* is stored into type.
* Return -ENOENT if file type is not found.
*/
static int find_file_type(struct inode_record *rec, u8 *type)
{
struct inode_backref *backref;
/* For inode item recovered case */
if (rec->found_inode_item) {
*type = imode_to_type(rec->imode);
return 0;
}
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->found_dir_index || backref->found_dir_item) {
*type = backref->filetype;
return 0;
}
}
return -ENOENT;
}
/*
* To determine the file name for nlink repair
*
* Return 0 if file name is found, set name and namelen.
* Return -ENOENT if file name is not found.
*/
static int find_file_name(struct inode_record *rec,
char *name, int *namelen)
{
struct inode_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->found_dir_index || backref->found_dir_item ||
backref->found_inode_ref) {
memcpy(name, backref->name, backref->namelen);
*namelen = backref->namelen;
return 0;
}
}
return -ENOENT;
}
/* Reset the nlink of the inode to the correct one */
static int reset_nlink(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct inode_backref *backref;
struct inode_backref *tmp;
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
int ret = 0;
/* We don't believe this either, reset it and iterate backref */
rec->found_link = 0;
/* Remove all backref including the valid ones */
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
backref->index, backref->name,
backref->namelen, 0);
if (ret < 0)
goto out;
/* remove invalid backref, so it won't be added back */
if (!(backref->found_dir_index &&
backref->found_dir_item &&
backref->found_inode_ref)) {
list_del(&backref->list);
free(backref);
} else {
rec->found_link++;
}
}
/* Set nlink to 0 */
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(path);
/*
* Add back valid inode_ref/dir_item/dir_index,
* add_link() will handle the nlink inc, so new nlink must be correct
*/
list_for_each_entry(backref, &rec->backrefs, list) {
ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
backref->name, backref->namelen,
backref->filetype, &backref->index, 1, 0);
if (ret < 0)
goto out;
}
out:
btrfs_release_path(path);
return ret;
}
static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
char namebuf[BTRFS_NAME_LEN] = {0};
u8 type = 0;
int namelen = 0;
int name_recovered = 0;
int type_recovered = 0;
int ret = 0;
/*
* Get file name and type first before these invalid inode ref
* are deleted by remove_all_invalid_backref()
*/
name_recovered = !find_file_name(rec, namebuf, &namelen);
type_recovered = !find_file_type(rec, &type);
if (!name_recovered) {
printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
rec->ino, rec->ino);
namelen = count_digits(rec->ino);
sprintf(namebuf, "%llu", rec->ino);
name_recovered = 1;
}
if (!type_recovered) {
printf("Can't get file type for inode %llu, using FILE as fallback\n",
rec->ino);
type = BTRFS_FT_REG_FILE;
type_recovered = 1;
}
ret = reset_nlink(trans, root, path, rec);
if (ret < 0) {
errno = -ret;
fprintf(stderr,
"Failed to reset nlink for inode %llu: %m\n", rec->ino);
goto out;
}
if (rec->found_link == 0) {
ret = link_inode_to_lostfound(trans, root, path, rec->ino,
namebuf, namelen, type,
(u64 *)&rec->found_link);
if (ret)
goto out;
}
printf("Fixed the nlink of inode %llu\n", rec->ino);
out:
/*
* Clear the flag anyway, or we will loop forever for the same inode
* as it will not be removed from the bad inode list and the dead loop
* happens.
*/
rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
btrfs_release_path(path);
return ret;
}
/*
* Check if there is any normal(reg or prealloc) file extent for given
* ino.
* This is used to determine the file type when neither its dir_index/item or
* inode_item exists.
*
* This will *NOT* report error, if any error happens, just consider it does
* not have any normal file extent.
*/
static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
u8 type;
int ret = 0;
btrfs_init_path(&path);
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
ret = 0;
goto out;
}
if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(root, &path);
if (ret) {
ret = 0;
goto out;
}
}
while (1) {
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
path.slots[0]);
if (found_key.objectid != ino ||
found_key.type != BTRFS_EXTENT_DATA_KEY)
break;
fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path.nodes[0], fi);
if (type != BTRFS_FILE_EXTENT_INLINE) {
ret = 1;
goto out;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static int repair_inode_no_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
u8 filetype;
u32 mode = 0700;
int type_recovered = 0;
int ret = 0;
printf("Trying to rebuild inode:%llu\n", rec->ino);
type_recovered = !find_file_type(rec, &filetype);
/*
* Try to determine inode type if type not found.
*
* For found regular file extent, it must be FILE.
* For found dir_item/index, it must be DIR.
*
* For undetermined one, use FILE as fallback.
*
* TODO:
* 1. If found backref(inode_index/item is already handled) to it,
* it must be DIR.
* Need new inode-inode ref structure to allow search for that.
*/
if (!type_recovered) {
if (rec->found_file_extent &&
find_normal_file_extent(root, rec->ino)) {
type_recovered = 1;
filetype = BTRFS_FT_REG_FILE;
} else if (rec->found_dir_item) {
type_recovered = 1;
filetype = BTRFS_FT_DIR;
} else{
printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
rec->ino);
type_recovered = 1;
filetype = BTRFS_FT_REG_FILE;
}
}
ret = btrfs_new_inode(trans, root, rec->ino,
mode | btrfs_type_to_imode(filetype));
if (ret < 0)
goto out;
/*
* Here inode rebuild is done, we only rebuild the inode item,
* don't repair the nlink(like move to lost+found).
* That is the job of nlink repair.
*
* We just fill the record and return
*/
rec->found_dir_item = 1;
rec->imode = mode | btrfs_type_to_imode(filetype);
rec->nlink = 0;
rec->errors &= ~I_ERR_NO_INODE_ITEM;
/* Ensure the inode_nlinks repair function will be called */
rec->errors |= I_ERR_LINK_COUNT_WRONG;
out:
return ret;
}
static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct rb_node *node;
struct file_extent_hole *hole;
int found = 0;
int ret = 0;
node = rb_first(&rec->holes);
while (node) {
found = 1;
hole = rb_entry(node, struct file_extent_hole, node);
ret = btrfs_punch_hole(trans, root, rec->ino,
hole->start, hole->len);
if (ret < 0)
goto out;
ret = del_file_extent_hole(&rec->holes, hole->start,
hole->len);
if (ret < 0)
goto out;
if (RB_EMPTY_ROOT(&rec->holes))
rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
node = rb_first(&rec->holes);
}
/* special case for a file losing all its file extent */
if (!found) {
ret = btrfs_punch_hole(trans, root, rec->ino, 0,
round_up(rec->isize, gfs_info->sectorsize));
if (ret < 0)
goto out;
}
printf("Fixed discount file extents for inode: %llu in root: %llu\n",
rec->ino, root->objectid);
out:
return ret;
}
static int repair_inline_ram_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 on_disk_item_len;
int ret;
key.objectid = rec->ino;
key.offset = 0;
key.type = BTRFS_EXTENT_DATA_KEY;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
on_disk_item_len = btrfs_file_extent_inline_item_len(path->nodes[0],
path->slots[0]);
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_item_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
printf("Repaired inline ram_bytes for root %llu ino %llu\n",
root->objectid, rec->ino);
rec->errors &= ~I_ERR_INLINE_RAM_BYTES_WRONG;
out:
btrfs_release_path(path);
return ret;
}
static int repair_mismatch_dir_hash(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode_record *rec)
{
struct mismatch_dir_hash_record *hash;
int ret = -EUCLEAN;
printf(
"Deleting bad dir items with invalid hash for root %llu ino %llu\n",
root->root_key.objectid, rec->ino);
while (!list_empty(&rec->mismatch_dir_hash)) {
char *namebuf;
hash = list_entry(rec->mismatch_dir_hash.next,
struct mismatch_dir_hash_record, list);
namebuf = (char *)(hash + 1);
ret = delete_corrupted_dir_item(trans, root, &hash->key,
namebuf, hash->namelen);
if (ret < 0)
break;
/* Also reduce dir isize */
rec->found_size -= hash->namelen;
list_del(&hash->list);
free(hash);
}
if (!ret) {
rec->errors &= ~I_ERR_MISMATCH_DIR_HASH;
/* We rely on later dir isize repair to reset dir isize */
rec->errors |= I_ERR_DIR_ISIZE_WRONG;
}
return ret;
}
static int btrfs_delete_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_key *key)
{
struct btrfs_path path;
int ret = 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(trans, root, key, &path, -1, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
btrfs_release_path(&path);
return ret;
}
ret = btrfs_del_item(trans, root, &path);
btrfs_release_path(&path);
return ret;
}
static int find_file_extent_offset_by_bytenr(struct btrfs_root *root,
u64 owner, u64 bytenr, u64 *offset_ret)
{
int ret = 0;
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
u64 disk_bytenr;
int slot;
btrfs_init_path(&path);
key.objectid = owner;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret) {
if (ret > 0)
ret = -ENOENT;
btrfs_release_path(&path);
return ret;
}
btrfs_release_path(&path);
key.objectid = owner;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_release_path(&path);
return ret;
}
while (1) {
leaf = path.nodes[0];
slot = path.slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, &path);
if (ret) {
if (ret > 0)
ret = 0;
break;
}
leaf = path.nodes[0];
slot = path.slots[0];
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if ((found_key.objectid != owner) ||
(found_key.type != BTRFS_EXTENT_DATA_KEY))
break;
fi = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == bytenr) {
*offset_ret = found_key.offset;
ret = 0;
break;
}
path.slots[0]++;
}
btrfs_release_path(&path);
return ret;
}
static int repair_unaligned_extent_recs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
int ret = 0;
struct btrfs_key key;
struct unaligned_extent_rec_t *urec;
struct unaligned_extent_rec_t *tmp;
list_for_each_entry_safe(urec, tmp, &rec->unaligned_extent_recs, list) {
key.objectid = urec->owner;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = urec->offset;
fprintf(stderr, "delete file extent item [%llu,%llu]\n",
urec->owner, urec->offset);
ret = btrfs_delete_item(trans, root, &key);
if (ret)
return ret;
list_del(&urec->list);
free(urec);
}
rec->errors &= ~I_ERR_UNALIGNED_EXTENT_REC;
return ret;
}
static int repair_imode_original(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_key key;
int ret;
u32 imode;
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
return ret;
if (root->objectid == BTRFS_ROOT_TREE_OBJECTID) {
/* In root tree we only have two possible imode */
if (rec->ino == BTRFS_ROOT_TREE_OBJECTID)
imode = S_IFDIR | 0755;
else
imode = S_IFREG | 0600;
} else {
ret = detect_imode(root, path, &imode);
if (ret < 0) {
btrfs_release_path(path);
return ret;
}
}
btrfs_release_path(path);
ret = reset_imode(trans, root, path, rec->ino, imode);
btrfs_release_path(path);
if (ret < 0)
return ret;
rec->errors &= ~I_ERR_INVALID_IMODE;
rec->imode = imode;
return ret;
}
static int repair_inode_gen_original(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_inode_item *ii;
struct btrfs_key key;
int ret;
key.objectid = rec->ino;
key.offset = 0;
key.type = BTRFS_INODE_ITEM_KEY;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0) {
ret = -ENOENT;
error("no inode item found for ino %llu", rec->ino);
return ret;
}
if (ret < 0) {
errno = -ret;
error("failed to search inode item for ino %llu: %m", rec->ino);
return ret;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_generation(path->nodes[0], ii, trans->transid);
btrfs_set_inode_transid(path->nodes[0], ii, trans->transid);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(path);
printf("resetting inode generation/transid to %llu for ino %llu\n",
trans->transid, rec->ino);
rec->errors &= ~I_ERR_INVALID_GEN;
return 0;
}
static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
{
struct btrfs_trans_handle *trans;
struct btrfs_path path;
int ret = 0;
/* unaligned extent recs always lead to csum missing error, clean it */
if ((rec->errors & I_ERR_SOME_CSUM_MISSING) &&
(rec->errors & I_ERR_UNALIGNED_EXTENT_REC))
rec->errors &= ~I_ERR_SOME_CSUM_MISSING;
if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
I_ERR_NO_ORPHAN_ITEM |
I_ERR_LINK_COUNT_WRONG |
I_ERR_NO_INODE_ITEM |
I_ERR_FILE_EXTENT_DISCOUNT |
I_ERR_FILE_NBYTES_WRONG |
I_ERR_INLINE_RAM_BYTES_WRONG |
I_ERR_MISMATCH_DIR_HASH |
I_ERR_UNALIGNED_EXTENT_REC |
I_ERR_INVALID_IMODE |
I_ERR_INVALID_GEN)))
return rec->errors;
/*
* For nlink repair, it may create a dir and add link, so
* 2 for parent(256)'s dir_index and dir_item
* 2 for lost+found dir's inode_item and inode_ref
* 1 for the new inode_ref of the file
* 2 for lost+found dir's dir_index and dir_item for the file
*/
trans = btrfs_start_transaction(root, 7);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_init_path(&path);
if (!ret && rec->errors & I_ERR_MISMATCH_DIR_HASH)
ret = repair_mismatch_dir_hash(trans, root, rec);
if (!ret && rec->errors & I_ERR_INVALID_IMODE)
ret = repair_imode_original(trans, root, &path, rec);
if (rec->errors & I_ERR_NO_INODE_ITEM)
ret = repair_inode_no_item(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
ret = repair_inode_discount_extent(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
ret = repair_inode_isize(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
ret = repair_inode_orphan_item(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
ret = repair_inode_nlinks(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
ret = repair_inode_nbytes(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_INLINE_RAM_BYTES_WRONG)
ret = repair_inline_ram_bytes(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_UNALIGNED_EXTENT_REC)
ret = repair_unaligned_extent_recs(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_INVALID_GEN)
ret = repair_inode_gen_original(trans, root, &path, rec);
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
return ret;
}
static int check_inode_recs(struct btrfs_root *root,
struct cache_tree *inode_cache)
{
struct cache_extent *cache;
struct ptr_node *node;
struct inode_record *rec;
struct inode_backref *backref;
int stage = 0;
int ret = 0;
int err = 0;
u64 error = 0;
u64 root_dirid = btrfs_root_dirid(&root->root_item);
if (btrfs_root_refs(&root->root_item) == 0) {
if (!cache_tree_empty(inode_cache))
fprintf(stderr, "warning line %d\n", __LINE__);
return 0;
}
/*
* We need to repair backrefs first because we could change some of the
* errors in the inode recs.
*
* We also need to go through and delete invalid backrefs first and then
* add the correct ones second. We do this because we may get EEXIST
* when adding back the correct index because we hadn't yet deleted the
* invalid index.
*
* For example, if we were missing a dir index then the directories
* isize would be wrong, so if we fixed the isize to what we thought it
* would be and then fixed the backref we'd still have a invalid fs, so
* we need to add back the dir index and then check to see if the isize
* is still wrong.
*/
while (stage < 3) {
stage++;
if (stage == 3 && !err)
break;
cache = search_cache_extent(inode_cache, 0);
while (opt_check_repair && cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
cache = next_cache_extent(cache);
/* Need to free everything up and rescan */
if (stage == 3) {
remove_cache_extent(inode_cache, &node->cache);
free(node);
free_inode_rec(rec);
continue;
}
if (list_empty(&rec->backrefs))
continue;
ret = repair_inode_backrefs(root, rec, inode_cache,
stage == 1);
if (ret < 0) {
err = ret;
stage = 2;
break;
} if (ret > 0) {
err = -EAGAIN;
}
}
}
if (err)
return err;
rec = get_inode_rec(inode_cache, root_dirid, 0);
BUG_ON(IS_ERR(rec));
if (rec) {
if (opt_check_repair) {
ret = try_repair_inode(root, rec);
if (ret < 0)
error++;
}
ret = check_root_dir(rec);
if (ret) {
print_inode_error(root, rec);
error++;
}
} else {
if (opt_check_repair) {
struct btrfs_trans_handle *trans;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
return err;
}
fprintf(stderr,
"root %llu missing its root dir, recreating\n",
(unsigned long long)root->objectid);
ret = btrfs_make_root_dir(trans, root, root_dirid);
btrfs-progs: exit gracefully when root dir item repair fails Another BUG_ON() during fuzz/003: ====== RUN MAYFAIL btrfs check --init-csum-tree tests/fuzz-tests/images/bko-161821.raw.restored [1/7] checking root items Fixed 0 roots. [2/7] checking extents parent transid verify failed on 4198400 wanted 14 found 1114126 parent transid verify failed on 4198400 wanted 14 found 1114126 Ignoring transid failure owner ref check failed [4198400 4096] repair deleting extent record: key [4198400,169,0] adding new tree backref on start 4198400 len 4096 parent 0 root 5 Repaired extent references for 4198400 ref mismatch on [4222976 4096] extent item 1, found 0 backref 4222976 root 7 not referenced back 0x55e9cc694780 incorrect global backref count on 4222976 found 1 wanted 0 backpointer mismatch on [4222976 4096] owner ref check failed [4222976 4096] repair deleting extent record: key [4222976,169,0] Repaired extent references for 4222976 [3/7] checking free space cache [4/7] checking fs roots parent transid verify failed on 4198400 wanted 14 found 1114126 Ignoring transid failure Wrong generation of child node/leaf, wanted: 1114126, have: 14 root 5 missing its root dir, recreating parent transid verify failed on 4198400 wanted 14 found 1114126 Ignoring transid failure ERROR: child eb corrupted: parent bytenr=4222976 item=0 parent level=1 child level=2 check/main.c:2738: check_inode_recs: BUG_ON `ret` triggered, value -5 failed (ignored, ret=134): btrfs check --init-csum-tree tests/fuzz-tests/images/bko-161821.raw.restored mayfail: returned code 134 (SIGABRT), not ignored test failed for case 003-multi-check-unmounted Just abort current transaction and exit gracefully in this case, the caller handles errors. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-08-03 13:50:18 +08:00
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_commit_transaction(trans, root);
return -EAGAIN;
}
fprintf(stderr, "root %llu root dir %llu not found\n",
(unsigned long long)root->root_key.objectid,
(unsigned long long)root_dirid);
}
while (1) {
cache = search_cache_extent(inode_cache, 0);
if (!cache)
break;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
remove_cache_extent(inode_cache, &node->cache);
free(node);
if (rec->ino == root_dirid ||
rec->ino == BTRFS_ORPHAN_OBJECTID) {
free_inode_rec(rec);
continue;
}
if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
ret = check_orphan_item(root, rec->ino);
if (ret == 0)
rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
if (can_free_inode_rec(rec)) {
free_inode_rec(rec);
continue;
}
}
if (!rec->found_inode_item)
rec->errors |= I_ERR_NO_INODE_ITEM;
if (rec->found_link != rec->nlink)
rec->errors |= I_ERR_LINK_COUNT_WRONG;
if (opt_check_repair) {
ret = try_repair_inode(root, rec);
if (ret == 0 && can_free_inode_rec(rec)) {
free_inode_rec(rec);
continue;
}
}
if (!(opt_check_repair && ret == 0))
error++;
print_inode_error(root, rec);
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->found_dir_item)
backref->errors |= REF_ERR_NO_DIR_ITEM;
if (!backref->found_dir_index)
backref->errors |= REF_ERR_NO_DIR_INDEX;
if (!backref->found_inode_ref)
backref->errors |= REF_ERR_NO_INODE_REF;
fprintf(stderr, "\tunresolved ref dir %llu index %llu"
" namelen %u name %s filetype %d errors %x",
(unsigned long long)backref->dir,
(unsigned long long)backref->index,
backref->namelen, backref->name,
backref->filetype, backref->errors);
print_ref_error(backref->errors);
}
free_inode_rec(rec);
}
return (error > 0) ? -1 : 0;
}
static struct root_record *get_root_rec(struct cache_tree *root_cache,
u64 objectid)
{
struct cache_extent *cache;
struct root_record *rec = NULL;
int ret;
cache = lookup_cache_extent(root_cache, objectid, 1);
if (cache) {
rec = container_of(cache, struct root_record, cache);
} else {
rec = calloc(1, sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
rec->objectid = objectid;
INIT_LIST_HEAD(&rec->backrefs);
rec->cache.start = objectid;
rec->cache.size = 1;
ret = insert_cache_extent(root_cache, &rec->cache);
if (ret)
return ERR_PTR(-EEXIST);
}
return rec;
}
static struct root_backref *get_root_backref(struct root_record *rec,
u64 ref_root, u64 dir, u64 index,
const char *name, int namelen)
{
struct root_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->ref_root != ref_root || backref->dir != dir ||
backref->namelen != namelen)
continue;
if (memcmp(name, backref->name, namelen))
continue;
return backref;
}
backref = calloc(1, sizeof(*backref) + namelen + 1);
if (!backref)
return NULL;
backref->ref_root = ref_root;
backref->dir = dir;
backref->index = index;
backref->namelen = namelen;
memcpy(backref->name, name, namelen);
backref->name[namelen] = '\0';
list_add_tail(&backref->list, &rec->backrefs);
return backref;
}
static void free_root_record(struct cache_extent *cache)
{
struct root_record *rec;
struct root_backref *backref;
rec = container_of(cache, struct root_record, cache);
while (!list_empty(&rec->backrefs)) {
backref = to_root_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
free(rec);
}
FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
static int add_root_backref(struct cache_tree *root_cache,
u64 root_id, u64 ref_root, u64 dir, u64 index,
const char *name, int namelen,
int item_type, int errors)
{
struct root_record *rec;
struct root_backref *backref;
rec = get_root_rec(root_cache, root_id);
BUG_ON(IS_ERR(rec));
backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
BUG_ON(!backref);
backref->errors |= errors;
if (item_type != BTRFS_DIR_ITEM_KEY) {
if (backref->found_dir_index || backref->found_back_ref ||
backref->found_forward_ref) {
if (backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
} else {
backref->index = index;
}
}
if (item_type == BTRFS_DIR_ITEM_KEY) {
if (backref->found_forward_ref)
rec->found_ref++;
backref->found_dir_item = 1;
} else if (item_type == BTRFS_DIR_INDEX_KEY) {
backref->found_dir_index = 1;
} else if (item_type == BTRFS_ROOT_REF_KEY) {
if (backref->found_forward_ref)
backref->errors |= REF_ERR_DUP_ROOT_REF;
else if (backref->found_dir_item)
rec->found_ref++;
backref->found_forward_ref = 1;
} else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
if (backref->found_back_ref)
backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
backref->found_back_ref = 1;
} else {
BUG_ON(1);
}
if (backref->found_forward_ref && backref->found_dir_item)
backref->reachable = 1;
return 0;
}
static int merge_root_recs(struct btrfs_root *root,
struct cache_tree *src_cache,
struct cache_tree *dst_cache)
{
struct cache_extent *cache;
struct ptr_node *node;
struct inode_record *rec;
struct inode_backref *backref;
int ret = 0;
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
free_inode_recs_tree(src_cache);
return 0;
}
while (1) {
cache = search_cache_extent(src_cache, 0);
if (!cache)
break;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
remove_cache_extent(src_cache, &node->cache);
free(node);
ret = is_child_root(root, root->objectid, rec->ino);
if (ret < 0)
break;
else if (ret == 0)
goto skip;
list_for_each_entry(backref, &rec->backrefs, list) {
BUG_ON(backref->found_inode_ref);
if (backref->found_dir_item)
add_root_backref(dst_cache, rec->ino,
root->root_key.objectid, backref->dir,
backref->index, backref->name,
backref->namelen, BTRFS_DIR_ITEM_KEY,
backref->errors);
if (backref->found_dir_index)
add_root_backref(dst_cache, rec->ino,
root->root_key.objectid, backref->dir,
backref->index, backref->name,
backref->namelen, BTRFS_DIR_INDEX_KEY,
backref->errors);
}
skip:
free_inode_rec(rec);
}
if (ret < 0)
return ret;
return 0;
}
static int check_root_refs(struct btrfs_root *root,
struct cache_tree *root_cache)
{
struct root_record *rec;
struct root_record *ref_root;
struct root_backref *backref;
struct cache_extent *cache;
int loop = 1;
int ret;
int error;
int errors = 0;
rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
BUG_ON(IS_ERR(rec));
rec->found_ref = 1;
/* fixme: this can not detect circular references */
while (loop) {
loop = 0;
cache = search_cache_extent(root_cache, 0);
while (1) {
g_task_ctx.item_count++;
if (!cache)
break;
rec = container_of(cache, struct root_record, cache);
cache = next_cache_extent(cache);
if (rec->found_ref == 0)
continue;
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->reachable)
continue;
ref_root = get_root_rec(root_cache,
backref->ref_root);
BUG_ON(IS_ERR(ref_root));
if (ref_root->found_ref > 0)
continue;
backref->reachable = 0;
rec->found_ref--;
if (rec->found_ref == 0)
loop = 1;
}
}
}
cache = search_cache_extent(root_cache, 0);
while (1) {
if (!cache)
break;
rec = container_of(cache, struct root_record, cache);
cache = next_cache_extent(cache);
if (rec->found_ref == 0 &&
rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
ret = check_orphan_item(gfs_info->tree_root, rec->objectid);
if (ret == 0)
continue;
/*
* If we don't have a root item then we likely just have
* a dir item in a snapshot for this root but no actual
* ref key or anything so it's meaningless.
*/
if (!rec->found_root_item)
continue;
errors++;
fprintf(stderr, "fs tree %llu not referenced\n",
(unsigned long long)rec->objectid);
}
error = 0;
if (rec->found_ref > 0 && !rec->found_root_item)
error = 1;
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->found_dir_item)
backref->errors |= REF_ERR_NO_DIR_ITEM;
if (!backref->found_dir_index)
backref->errors |= REF_ERR_NO_DIR_INDEX;
if (!backref->found_back_ref)
backref->errors |= REF_ERR_NO_ROOT_BACKREF;
if (!backref->found_forward_ref)
backref->errors |= REF_ERR_NO_ROOT_REF;
if (backref->reachable && backref->errors)
error = 1;
}
if (!error)
continue;
errors++;
fprintf(stderr, "fs tree %llu refs %u %s\n",
(unsigned long long)rec->objectid, rec->found_ref,
rec->found_root_item ? "" : "not found");
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->reachable)
continue;
if (!backref->errors && rec->found_root_item)
continue;
fprintf(stderr, "\tunresolved ref root %llu dir %llu"
" index %llu namelen %u name %s errors %x\n",
(unsigned long long)backref->ref_root,
(unsigned long long)backref->dir,
(unsigned long long)backref->index,
backref->namelen, backref->name,
backref->errors);
print_ref_error(backref->errors);
}
}
return errors > 0 ? 1 : 0;
}
static int process_root_ref(struct extent_buffer *eb, int slot,
struct btrfs_key *key,
struct cache_tree *root_cache)
{
u64 dirid;
u64 index;
u32 len;
u32 name_len;
struct btrfs_root_ref *ref;
char namebuf[BTRFS_NAME_LEN];
int error;
ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
dirid = btrfs_root_ref_dirid(eb, ref);
index = btrfs_root_ref_sequence(eb, ref);
name_len = btrfs_root_ref_name_len(eb, ref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
error = 0;
} else {
len = BTRFS_NAME_LEN;
error = REF_ERR_NAME_TOO_LONG;
}
read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
if (key->type == BTRFS_ROOT_REF_KEY) {
add_root_backref(root_cache, key->offset, key->objectid, dirid,
index, namebuf, len, key->type, error);
} else {
add_root_backref(root_cache, key->objectid, key->offset, dirid,
index, namebuf, len, key->type, error);
}
return 0;
}
static void free_corrupt_block(struct cache_extent *cache)
{
struct btrfs_corrupt_block *corrupt;
corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
free(corrupt);
}
FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
/*
* Repair the btree of the given root.
*
* The fix is to remove the node key in corrupt_blocks cache_tree.
* and rebalance the tree.
* After the fix, the btree should be writeable.
*/
static int repair_btree(struct btrfs_root *root,
struct cache_tree *corrupt_blocks)
{
struct btrfs_trans_handle *trans;
struct btrfs_path path;
struct btrfs_corrupt_block *corrupt;
struct cache_extent *cache;
struct btrfs_key key;
u64 offset;
int level;
int ret = 0;
if (cache_tree_empty(corrupt_blocks))
return 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
fprintf(stderr, "Error starting transaction: %m\n");
return ret;
}
btrfs_init_path(&path);
cache = first_cache_extent(corrupt_blocks);
while (cache) {
corrupt = container_of(cache, struct btrfs_corrupt_block,
cache);
level = corrupt->level;
path.lowest_level = level;
key.objectid = corrupt->key.objectid;
key.type = corrupt->key.type;
key.offset = corrupt->key.offset;
/*
* Here we don't want to do any tree balance, since it may
* cause a balance with corrupted brother leaf/node,
* so ins_len set to 0 here.
* Balance will be done after all corrupt node/leaf is deleted.
*/
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
if (ret < 0)
goto out;
offset = btrfs_node_blockptr(path.nodes[level],
path.slots[level]);
/* Remove the ptr */
ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
if (ret < 0)
goto out;
/*
* Remove the corresponding extent
* return value is not concerned.
*/
btrfs_release_path(&path);
ret = btrfs_free_extent(trans, root, offset,
gfs_info->nodesize, 0,
root->root_key.objectid, level - 1, 0);
cache = next_cache_extent(cache);
}
/* Balance the btree using btrfs_search_slot() */
cache = first_cache_extent(corrupt_blocks);
while (cache) {
corrupt = container_of(cache, struct btrfs_corrupt_block,
cache);
memcpy(&key, &corrupt->key, sizeof(key));
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret < 0)
goto out;
/* return will always >0 since it won't find the item */
ret = 0;
btrfs_release_path(&path);
cache = next_cache_extent(cache);
}
out:
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
return ret;
}
static int check_fs_root(struct btrfs_root *root,
struct cache_tree *root_cache,
struct walk_control *wc)
{
int ret = 0;
int err = 0;
bool generation_err = false;
int wret;
int level;
u64 super_generation;
struct btrfs_path path;
struct shared_node root_node;
struct root_record *rec;
struct btrfs_root_item *root_item = &root->root_item;
struct cache_tree corrupt_blocks;
enum btrfs_tree_block_status status;
struct node_refs nrefs;
struct unaligned_extent_rec_t *urec;
struct unaligned_extent_rec_t *tmp;
super_generation = btrfs_super_generation(gfs_info->super_copy);
if (btrfs_root_generation(root_item) > super_generation + 1) {
error(
"invalid generation for root %llu, have %llu expect (0, %llu]",
root->root_key.objectid, btrfs_root_generation(root_item),
super_generation + 1);
generation_err = true;
if (opt_check_repair) {
root->node->flags |= EXTENT_BUFFER_BAD_TRANSID;
ret = recow_extent_buffer(root, root->node);
if (!ret) {
printf("Reset generation for root %llu\n",
root->root_key.objectid);
generation_err = false;
}
}
}
/*
* Reuse the corrupt_block cache tree to record corrupted tree block
*
* Unlike the usage in extent tree check, here we do it in a per
* fs/subvol tree base.
*/
cache_tree_init(&corrupt_blocks);
gfs_info->corrupt_blocks = &corrupt_blocks;
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
rec = get_root_rec(root_cache, root->root_key.objectid);
BUG_ON(IS_ERR(rec));
if (btrfs_root_refs(root_item) > 0)
rec->found_root_item = 1;
}
btrfs_init_path(&path);
memset(&root_node, 0, sizeof(root_node));
cache_tree_init(&root_node.root_cache);
cache_tree_init(&root_node.inode_cache);
memset(&nrefs, 0, sizeof(nrefs));
/* Mode unaligned extent recs to corresponding inode record */
list_for_each_entry_safe(urec, tmp,
&root->unaligned_extent_recs, list) {
struct inode_record *inode;
inode = get_inode_rec(&root_node.inode_cache, urec->owner, 1);
if (IS_ERR_OR_NULL(inode)) {
fprintf(stderr,
"fail to get inode rec on [%llu,%llu]\n",
urec->objectid, urec->owner);
list_del(&urec->list);
free(urec);
continue;
}
inode->errors |= I_ERR_UNALIGNED_EXTENT_REC;
list_move(&urec->list, &inode->unaligned_extent_recs);
}
level = btrfs_header_level(root->node);
memset(wc->nodes, 0, sizeof(wc->nodes));
wc->nodes[level] = &root_node;
wc->active_node = level;
wc->root_level = level;
/* We may not have checked the root block, lets do that now */
if (btrfs_is_leaf(root->node))
status = btrfs_check_leaf(gfs_info, NULL, root->node);
else
status = btrfs_check_node(gfs_info, NULL, root->node);
if (status != BTRFS_TREE_BLOCK_CLEAN)
return -EIO;
if (btrfs_root_refs(root_item) > 0 ||
btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
path.nodes[level] = root->node;
extent_buffer_get(root->node);
path.slots[level] = 0;
} else {
struct btrfs_key key;
struct btrfs_disk_key found_key;
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
level = root_item->drop_level;
path.lowest_level = level;
if (level > btrfs_header_level(root->node) ||
level >= BTRFS_MAX_LEVEL) {
error("ignoring invalid drop level: %u", level);
goto skip_walking;
}
wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (wret < 0)
goto skip_walking;
btrfs_node_key(path.nodes[level], &found_key,
path.slots[level]);
WARN_ON(memcmp(&found_key, &root_item->drop_progress,
sizeof(found_key)));
}
while (1) {
g_task_ctx.item_count++;
wret = walk_down_tree(root, &path, wc, &level, &nrefs);
if (wret < 0)
ret = wret;
if (wret != 0)
break;
wret = walk_up_tree(root, &path, wc, &level);
if (wret < 0)
ret = wret;
if (wret != 0)
break;
}
skip_walking:
btrfs_release_path(&path);
if (!cache_tree_empty(&corrupt_blocks)) {
struct cache_extent *cache;
struct btrfs_corrupt_block *corrupt;
printf("The following tree block(s) is corrupted in tree %llu:\n",
root->root_key.objectid);
cache = first_cache_extent(&corrupt_blocks);
while (cache) {
corrupt = container_of(cache,
struct btrfs_corrupt_block,
cache);
printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
cache->start, corrupt->level,
corrupt->key.objectid, corrupt->key.type,
corrupt->key.offset);
cache = next_cache_extent(cache);
}
if (opt_check_repair) {
printf("Try to repair the btree for root %llu\n",
root->root_key.objectid);
ret = repair_btree(root, &corrupt_blocks);
if (ret < 0) {
errno = -ret;
fprintf(stderr, "Failed to repair btree: %m\n");
}
if (!ret)
printf("Btree for root %llu is fixed\n",
root->root_key.objectid);
}
}
err = merge_root_recs(root, &root_node.root_cache, root_cache);
if (err < 0)
ret = err;
if (root_node.current) {
root_node.current->checked = 1;
maybe_free_inode_rec(&root_node.inode_cache,
root_node.current);
}
err = check_inode_recs(root, &root_node.inode_cache);
if (!ret)
ret = err;
free_corrupt_blocks_tree(&corrupt_blocks);
gfs_info->corrupt_blocks = NULL;
if (!ret && generation_err)
ret = -1;
return ret;
}
static int check_fs_roots(struct cache_tree *root_cache)
{
struct btrfs_path path;
struct btrfs_key key;
struct walk_control wc;
struct extent_buffer *leaf, *tree_node;
struct btrfs_root *tmp_root;
struct btrfs_root *tree_root = gfs_info->tree_root;
u64 skip_root = 0;
int ret;
int err = 0;
/*
* Just in case we made any changes to the extent tree that weren't
* reflected into the free space cache yet.
*/
if (opt_check_repair)
reset_cached_block_groups();
memset(&wc, 0, sizeof(wc));
cache_tree_init(&wc.shared);
btrfs_init_path(&path);
again:
key.offset = 0;
if (skip_root)
key.objectid = skip_root + 1;
else
key.objectid = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
if (ret < 0) {
err = 1;
goto out;
}
tree_node = tree_root->node;
while (1) {
if (tree_node != tree_root->node) {
free_root_recs_tree(root_cache);
btrfs_release_path(&path);
goto again;
}
leaf = path.nodes[0];
if (path.slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(tree_root, &path);
if (ret) {
if (ret < 0)
err = 1;
break;
}
leaf = path.nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.type == BTRFS_ROOT_ITEM_KEY &&
fs_root_objectid(key.objectid)) {
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
tmp_root = btrfs_read_fs_root_no_cache(
gfs_info, &key);
} else {
key.offset = (u64)-1;
tmp_root = btrfs_read_fs_root(
gfs_info, &key);
}
if (IS_ERR(tmp_root)) {
err = 1;
goto next;
}
ret = check_fs_root(tmp_root, root_cache, &wc);
if (ret == -EAGAIN) {
free_root_recs_tree(root_cache);
btrfs_release_path(&path);
goto again;
}
if (ret) {
err = 1;
/*
* We failed to repair this root but modified
* tree root, after again: label we will still
* hit this root and fail to repair, so we must
* skip it to avoid infinite loop.
*/
if (opt_check_repair)
skip_root = key.objectid;
}
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(tmp_root);
} else if (key.type == BTRFS_ROOT_REF_KEY ||
key.type == BTRFS_ROOT_BACKREF_KEY) {
process_root_ref(leaf, path.slots[0], &key,
root_cache);
} else if (key.type == BTRFS_INODE_ITEM_KEY &&
is_fstree(key.objectid)) {
ret = check_repair_free_space_inode(&path);
if (ret < 0 && !path.nodes[0]) {
err = 1;
goto out;
}
if (ret < 0 && path.nodes[0]) {
err = 1;
goto next;
}
}
next:
path.slots[0]++;
}
out:
btrfs_release_path(&path);
if (err)
free_extent_cache_tree(&wc.shared);
if (!cache_tree_empty(&wc.shared))
fprintf(stderr, "warning line %d\n", __LINE__);
return err;
}
static struct tree_backref *find_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
{
struct rb_node *node;
struct tree_backref *back = NULL;
struct tree_backref match = {
.node = {
.is_data = 0,
},
};
if (parent) {
match.parent = parent;
match.node.full_backref = 1;
} else {
match.root = root;
}
node = rb_search(&rec->backref_tree, &match.node.node,
(rb_compare_keys)compare_extent_backref, NULL);
if (node)
back = to_tree_backref(rb_node_to_extent_backref(node));
return back;
}
static struct data_backref *find_data_backref(struct extent_record *rec,
u64 parent, u64 root,
u64 owner, u64 offset,
int found_ref,
u64 disk_bytenr, u64 bytes)
{
struct rb_node *node;
struct data_backref *back = NULL;
struct data_backref match = {
.node = {
.is_data = 1,
},
.owner = owner,
.offset = offset,
.bytes = bytes,
.found_ref = found_ref,
.disk_bytenr = disk_bytenr,
};
if (parent) {
match.parent = parent;
match.node.full_backref = 1;
} else {
match.root = root;
}
node = rb_search(&rec->backref_tree, &match.node.node,
(rb_compare_keys)compare_extent_backref, NULL);
if (node)
back = to_data_backref(rb_node_to_extent_backref(node));
return back;
}
static int do_check_fs_roots(struct cache_tree *root_cache)
{
int ret;
if (check_mode == CHECK_MODE_LOWMEM)
ret = check_fs_roots_lowmem();
else
ret = check_fs_roots(root_cache);
return ret;
}
static int all_backpointers_checked(struct extent_record *rec, int print_errs)
{
struct extent_backref *back, *tmp;
struct tree_backref *tback;
struct data_backref *dback;
u64 found = 0;
int err = 0;
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
if (!back->found_extent_tree) {
err = 1;
if (!print_errs)
goto out;
if (back->is_data) {
dback = to_data_backref(back);
fprintf(stderr,
"data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
(unsigned long long)rec->start,
back->full_backref ?
"parent" : "root",
back->full_backref ?
(unsigned long long)dback->parent :
(unsigned long long)dback->root,
(unsigned long long)dback->owner,
(unsigned long long)dback->offset,
(unsigned long)dback->num_refs);
} else {
tback = to_tree_backref(back);
fprintf(stderr,
"tree backref %llu %s %llu not found in extent tree\n",
(unsigned long long)rec->start,
back->full_backref ? "parent" : "root",
back->full_backref ?
(unsigned long long)tback->parent :
(unsigned long long)tback->root);
}
}
if (!back->is_data && !back->found_ref) {
err = 1;
if (!print_errs)
goto out;
tback = to_tree_backref(back);
fprintf(stderr,
"backref %llu %s %llu not referenced back %p\n",
(unsigned long long)rec->start,
back->full_backref ? "parent" : "root",
back->full_backref ?
(unsigned long long)tback->parent :
(unsigned long long)tback->root, back);
}
if (back->is_data) {
dback = to_data_backref(back);
if (dback->found_ref != dback->num_refs) {
err = 1;
if (!print_errs)
goto out;
fprintf(stderr,
"incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
(unsigned long long)rec->start,
back->full_backref ?
"parent" : "root",
back->full_backref ?
(unsigned long long)dback->parent :
(unsigned long long)dback->root,
(unsigned long long)dback->owner,
(unsigned long long)dback->offset,
dback->found_ref, dback->num_refs,
back);
}
if (dback->disk_bytenr != rec->start) {
err = 1;
if (!print_errs)
goto out;
fprintf(stderr,
"backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
(unsigned long long)rec->start,
(unsigned long long)dback->disk_bytenr);
}
if (dback->bytes != rec->nr) {
err = 1;
if (!print_errs)
goto out;
fprintf(stderr,
"backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
(unsigned long long)rec->start,
(unsigned long long)rec->nr,
(unsigned long long)dback->bytes);
}
}
if (!back->is_data) {
found += 1;
} else {
dback = to_data_backref(back);
found += dback->found_ref;
}
}
if (found != rec->refs) {
err = 1;
if (!print_errs)
goto out;
fprintf(stderr,
"incorrect global backref count on %llu found %llu wanted %llu\n",
(unsigned long long)rec->start,
(unsigned long long)found,
(unsigned long long)rec->refs);
}
out:
return err;
}
static void __free_one_backref(struct rb_node *node)
{
struct extent_backref *back = rb_node_to_extent_backref(node);
free(back);
}
static void free_all_extent_backrefs(struct extent_record *rec)
{
rb_free_nodes(&rec->backref_tree, __free_one_backref);
}
static void free_extent_record_cache(struct cache_tree *extent_cache)
{
struct cache_extent *cache;
struct extent_record *rec;
while (1) {
cache = first_cache_extent(extent_cache);
if (!cache)
break;
rec = container_of(cache, struct extent_record, cache);
remove_cache_extent(extent_cache, cache);
free_all_extent_backrefs(rec);
free(rec);
}
}
static int maybe_free_extent_rec(struct cache_tree *extent_cache,
struct extent_record *rec)
{
u64 super_gen = btrfs_super_generation(gfs_info->super_copy);
if (rec->content_checked && rec->owner_ref_checked &&
rec->extent_item_refs == rec->refs && rec->refs > 0 &&
rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
!rec->bad_full_backref && !rec->crossing_stripes &&
rec->generation <= super_gen + 1 &&
!rec->wrong_chunk_type) {
remove_cache_extent(extent_cache, &rec->cache);
free_all_extent_backrefs(rec);
list_del_init(&rec->list);
free(rec);
}
return 0;
}
static int check_owner_ref(struct btrfs_root *root,
struct extent_record *rec,
struct extent_buffer *buf)
{
struct extent_backref *node, *tmp;
struct tree_backref *back;
struct btrfs_root *ref_root;
struct btrfs_key key;
struct btrfs_path path;
struct extent_buffer *parent;
int level;
int found = 0;
int ret;
rbtree_postorder_for_each_entry_safe(node, tmp,
&rec->backref_tree, node) {
if (node->is_data)
continue;
if (!node->found_ref)
continue;
if (node->full_backref)
continue;
back = to_tree_backref(node);
if (btrfs_header_owner(buf) == back->root)
return 0;
}
/*
* Some unexpected root item referring to this one, return 1 to
* indicate owner not found
*/
if (rec->is_root)
return 1;
/* try to find the block by search corresponding fs tree */
key.objectid = btrfs_header_owner(buf);
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
ref_root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(ref_root))
return 1;
level = btrfs_header_level(buf);
if (level == 0)
btrfs_item_key_to_cpu(buf, &key, 0);
else
btrfs_node_key_to_cpu(buf, &key, 0);
btrfs_init_path(&path);
path.lowest_level = level + 1;
ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
if (ret < 0)
return 0;
parent = path.nodes[level + 1];
if (parent && buf->start == btrfs_node_blockptr(parent,
path.slots[level + 1]))
found = 1;
btrfs_release_path(&path);
return found ? 0 : 1;
}
static int is_extent_tree_record(struct extent_record *rec)
{
struct extent_backref *node, *tmp;
struct tree_backref *back;
int is_extent = 0;
rbtree_postorder_for_each_entry_safe(node, tmp,
&rec->backref_tree, node) {
if (node->is_data)
return 0;
back = to_tree_backref(node);
if (node->full_backref)
return 0;
if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
is_extent = 1;
}
return is_extent;
}
static int record_bad_block_io(struct cache_tree *extent_cache,
u64 start, u64 len)
{
struct extent_record *rec;
struct cache_extent *cache;
cache = lookup_cache_extent(extent_cache, start, len);
if (!cache)
return 0;
rec = container_of(cache, struct extent_record, cache);
if (!is_extent_tree_record(rec))
return 0;
return btrfs_add_corrupt_extent_record(gfs_info, &rec->parent_key,
start, len, 0);
}
static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
struct extent_buffer *buf, int slot)
{
if (btrfs_header_level(buf)) {
struct btrfs_key_ptr ptr1, ptr2;
read_extent_buffer(buf, &ptr1,
btrfs_node_key_ptr_offset(buf, slot),
sizeof(struct btrfs_key_ptr));
read_extent_buffer(buf, &ptr2,
btrfs_node_key_ptr_offset(buf, slot + 1),
sizeof(struct btrfs_key_ptr));
write_extent_buffer(buf, &ptr1,
btrfs_node_key_ptr_offset(buf, slot + 1),
sizeof(struct btrfs_key_ptr));
write_extent_buffer(buf, &ptr2,
btrfs_node_key_ptr_offset(buf, slot),
sizeof(struct btrfs_key_ptr));
if (slot == 0) {
struct btrfs_disk_key key;
btrfs_node_key(buf, &key, 0);
btrfs_fixup_low_keys(path, &key, btrfs_header_level(buf) + 1);
}
} else {
struct btrfs_key k1, k2;
char *item1_data, *item2_data;
u32 item1_offset, item2_offset, item1_size, item2_size;
btrfs_item_key_to_cpu(buf, &k1, slot);
btrfs_item_key_to_cpu(buf, &k2, slot + 1);
item1_offset = btrfs_item_offset(buf, slot);
item2_offset = btrfs_item_offset(buf, slot + 1);
item1_size = btrfs_item_size(buf, slot);
item2_size = btrfs_item_size(buf, slot + 1);
item1_data = malloc(item1_size);
if (!item1_data)
return -ENOMEM;
item2_data = malloc(item2_size);
if (!item2_data) {
free(item1_data);
return -ENOMEM;
}
read_extent_buffer(buf, item1_data, item1_offset, item1_size);
read_extent_buffer(buf, item2_data, item2_offset, item2_size);
btrfs-progs: check: Avoid reading beyond item boundary for inode_ref When reading out name from inode_ref, it's possible that corrupted name_len can lead to read beyond boundary of item or even extent buffer. This happens when checking fuzzed image /tmp/bko-161811.raw, for both lowmem mode and original mode. ERROR: root 5 INODE REF[256 256] doesn't have related DIR_INDEX[256 504403158265495680] namelen 0 filename filetype 0 ERROR: root 5 INODE REF[256 256] doesn't have related DIR_ITEM[256 4294967294] namelen 0 filename filetype 0 WARNING: root 5 INODE_REF[256 256] name too long ==13022== Invalid read of size 8 ==13022== at 0x4C319BE: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x431518: read_extent_buffer (extent_io.c:863) ==13022== by 0x474730: check_inode_ref (cmds-check.c:4307) ==13022== by 0x475D65: check_inode_item (cmds-check.c:4890) ==13022== by 0x476200: check_fs_first_inode (cmds-check.c:5011) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) ==13022== by 0x40A8C5: main (btrfs.c:246) ==13022== Address 0x5c96780 is 0 bytes after a block of size 4,224 alloc'd ==13022== at 0x4C2CF35: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==13022== by 0x4307E0: __alloc_extent_buffer (extent_io.c:538) ==13022== by 0x430C37: alloc_extent_buffer (extent_io.c:642) ==13022== by 0x413DFE: btrfs_find_create_tree_block (disk-io.c:193) ==13022== by 0x414370: read_tree_block_fs_info (disk-io.c:340) ==13022== by 0x40B5D5: read_tree_block (disk-io.h:125) ==13022== by 0x40CFD2: read_node_slot (ctree.c:652) ==13022== by 0x40E5EB: btrfs_search_slot (ctree.c:1172) ==13022== by 0x4761A8: check_fs_first_inode (cmds-check.c:5001) ==13022== by 0x476276: check_fs_root_v2 (cmds-check.c:5044) ==13022== by 0x4769FB: check_fs_roots_v2 (cmds-check.c:5242) ==13022== by 0x488B5B: cmd_check (cmds-check.c:13033) = Fix it by double checking inode_ref, name_len against item boundary before trying to read out name from extent buffer, for both original mode and lowmem mode. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-05-03 16:42:39 +08:00
write_extent_buffer(buf, item1_data, item2_offset, item2_size);
write_extent_buffer(buf, item2_data, item1_offset, item1_size);
free(item1_data);
free(item2_data);
btrfs_set_item_offset(buf, slot, item2_offset);
btrfs_set_item_offset(buf, slot + 1, item1_offset);
btrfs_set_item_size(buf, slot, item2_size);
btrfs_set_item_size(buf, slot + 1, item1_size);
path->slots[0] = slot;
btrfs_set_item_key_unsafe(root, path, &k2);
path->slots[0] = slot + 1;
btrfs_set_item_key_unsafe(root, path, &k1);
}
return 0;
}
static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
{
struct extent_buffer *buf;
struct btrfs_key k1, k2;
int i;
int level = path->lowest_level;
int ret = -EIO;
buf = path->nodes[level];
for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
if (level) {
btrfs_node_key_to_cpu(buf, &k1, i);
btrfs_node_key_to_cpu(buf, &k2, i + 1);
} else {
btrfs_item_key_to_cpu(buf, &k1, i);
btrfs_item_key_to_cpu(buf, &k2, i + 1);
}
if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
continue;
ret = swap_values(root, path, buf, i);
if (ret)
break;
btrfs_mark_buffer_dirty(buf);
i = 0;
}
return ret;
}
static int delete_bogus_item(struct btrfs_root *root,
struct btrfs_path *path,
struct extent_buffer *buf, int slot)
{
struct btrfs_key key;
int nritems = btrfs_header_nritems(buf);
btrfs_item_key_to_cpu(buf, &key, slot);
/* These are all the keys we can deal with missing. */
if (key.type != BTRFS_DIR_INDEX_KEY &&
key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY &&
key.type != BTRFS_TREE_BLOCK_REF_KEY &&
key.type != BTRFS_EXTENT_DATA_REF_KEY)
return -1;
printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
(unsigned long long)key.objectid, key.type,
(unsigned long long)key.offset, slot, buf->start);
memmove_extent_buffer(buf, btrfs_item_nr_offset(buf, slot),
btrfs_item_nr_offset(buf, slot + 1),
sizeof(struct btrfs_item) *
(nritems - slot - 1));
btrfs_set_header_nritems(buf, nritems - 1);
if (slot == 0) {
struct btrfs_disk_key disk_key;
btrfs_item_key(buf, &disk_key, 0);
btrfs_fixup_low_keys(path, &disk_key, 1);
}
btrfs_mark_buffer_dirty(buf);
return 0;
}
static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
{
struct extent_buffer *buf;
int i;
int ret = 0;
/* We should only get this for leaves */
BUG_ON(path->lowest_level);
buf = path->nodes[0];
again:
for (i = 0; i < btrfs_header_nritems(buf); i++) {
unsigned int shift = 0, offset;
if (i == 0 && btrfs_item_data_end(buf, i) !=
BTRFS_LEAF_DATA_SIZE(gfs_info)) {
if (btrfs_item_data_end(buf, i) >
BTRFS_LEAF_DATA_SIZE(gfs_info)) {
ret = delete_bogus_item(root, path, buf, i);
if (!ret)
goto again;
fprintf(stderr,
"item is off the end of the leaf, can't fix\n");
ret = -EIO;
break;
}
shift = BTRFS_LEAF_DATA_SIZE(gfs_info) -
btrfs_item_data_end(buf, i);
} else if (i > 0 && btrfs_item_data_end(buf, i) !=
btrfs_item_offset(buf, i - 1)) {
if (btrfs_item_data_end(buf, i) >
btrfs_item_offset(buf, i - 1)) {
ret = delete_bogus_item(root, path, buf, i);
if (!ret)
goto again;
fprintf(stderr, "items overlap, can't fix\n");
ret = -EIO;
break;
}
shift = btrfs_item_offset(buf, i - 1) -
btrfs_item_data_end(buf, i);
}
if (!shift)
continue;
printf("Shifting item nr %d by %u bytes in block %llu\n",
i, shift, (unsigned long long)buf->start);
offset = btrfs_item_offset(buf, i);
memmove_extent_buffer(buf,
btrfs_leaf_data(buf) + offset + shift,
btrfs_leaf_data(buf) + offset,
btrfs_item_size(buf, i));
btrfs_set_item_offset(buf, i, offset + shift);
btrfs_mark_buffer_dirty(buf);
}
/*
* We may have moved things, in which case we want to exit so we don't
* write those changes out. Once we have proper abort functionality in
* progs this can be changed to something nicer.
*/
BUG_ON(ret);
return ret;
}
/*
* Attempt to fix basic block failures. If we can't fix it for whatever reason
* then just return -EIO.
*/
static int try_to_fix_bad_block(struct btrfs_root *root,
struct extent_buffer *buf,
enum btrfs_tree_block_status status)
{
struct btrfs_trans_handle *trans;
struct ulist *roots;
struct ulist_node *node;
struct btrfs_root *search_root;
struct btrfs_path path;
struct ulist_iterator iter;
struct btrfs_key root_key, key;
int ret;
if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
return -EIO;
ret = btrfs_find_all_roots(NULL, gfs_info, buf->start, 0, &roots);
if (ret)
return -EIO;
btrfs_init_path(&path);
ULIST_ITER_INIT(&iter);
/*
* If we found no roots referencing to this tree block, there is no
* chance to fix. So our default ret is -EIO.
*/
ret = -EIO;
while ((node = ulist_next(roots, &iter))) {
root_key.objectid = node->val;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
search_root = btrfs_read_fs_root(gfs_info, &root_key);
if (IS_ERR(root)) {
ret = -EIO;
break;
}
trans = btrfs_start_transaction(search_root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
path.lowest_level = btrfs_header_level(buf);
path.skip_check_block = 1;
if (path.lowest_level)
btrfs_node_key_to_cpu(buf, &key, 0);
else
btrfs_item_key_to_cpu(buf, &key, 0);
ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
if (ret) {
ret = -EIO;
btrfs_commit_transaction(trans, search_root);
break;
}
if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
ret = fix_key_order(search_root, &path);
else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
ret = fix_item_offset(search_root, &path);
if (ret) {
btrfs_commit_transaction(trans, search_root);
break;
}
btrfs_release_path(&path);
btrfs_commit_transaction(trans, search_root);
}
ulist_free(roots);
btrfs_release_path(&path);
return ret;
}
static int check_block(struct btrfs_root *root,
struct cache_tree *extent_cache,
struct extent_buffer *buf, u64 flags)
{
struct extent_record *rec;
struct cache_extent *cache;
struct btrfs_key key;
enum btrfs_tree_block_status status;
int ret = 0;
int level;
cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
if (!cache)
return 1;
rec = container_of(cache, struct extent_record, cache);
if (rec->generation < btrfs_header_generation(buf))
rec->generation = btrfs_header_generation(buf);
level = btrfs_header_level(buf);
if (btrfs_header_nritems(buf) > 0) {
if (level == 0)
btrfs_item_key_to_cpu(buf, &key, 0);
else
btrfs_node_key_to_cpu(buf, &key, 0);
rec->info_objectid = key.objectid;
}
rec->info_level = level;
if (btrfs_is_leaf(buf))
status = btrfs_check_leaf(gfs_info, &rec->parent_key, buf);
else
status = btrfs_check_node(gfs_info, &rec->parent_key, buf);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
if (opt_check_repair)
status = try_to_fix_bad_block(root, buf, status);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
ret = -EIO;
fprintf(stderr, "bad block %llu\n",
(unsigned long long)buf->start);
} else {
/*
* Signal to callers we need to start the scan over
* again since we'll have cowed blocks.
*/
ret = -EAGAIN;
}
} else {
rec->content_checked = 1;
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
rec->owner_ref_checked = 1;
else {
ret = check_owner_ref(root, rec, buf);
if (!ret)
rec->owner_ref_checked = 1;
}
}
if (!ret)
maybe_free_extent_rec(extent_cache, rec);
return ret;
}
static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
{
struct tree_backref *ref = malloc(sizeof(*ref));
if (!ref)
return NULL;
memset(&ref->node, 0, sizeof(ref->node));
if (parent > 0) {
ref->parent = parent;
ref->node.full_backref = 1;
} else {
ref->root = root;
ref->node.full_backref = 0;
}
return ref;
}
static struct data_backref *alloc_data_backref(struct extent_record *rec,
u64 parent, u64 root,
u64 owner, u64 offset,
u64 max_size)
{
struct data_backref *ref = malloc(sizeof(*ref));
if (!ref)
return NULL;
memset(ref, 0, sizeof(*ref));
ref->node.is_data = 1;
if (parent > 0) {
ref->parent = parent;
ref->owner = 0;
ref->offset = 0;
ref->node.full_backref = 1;
} else {
ref->root = root;
ref->owner = owner;
ref->offset = offset;
ref->node.full_backref = 0;
}
ref->bytes = max_size;
ref->found_ref = 0;
ref->num_refs = 0;
if (max_size > rec->max_size)
rec->max_size = max_size;
return ref;
}
/* Check if the type of extent matches with its chunk */
static void check_extent_type(struct extent_record *rec)
{
struct btrfs_block_group *bg_cache;
bg_cache = btrfs_lookup_first_block_group(gfs_info, rec->start);
if (!bg_cache)
return;
/* data extent, check chunk directly*/
if (!rec->metadata) {
if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
rec->wrong_chunk_type = 1;
return;
}
/* metadata extent, check the obvious case first */
if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA))) {
rec->wrong_chunk_type = 1;
return;
}
/*
* Check SYSTEM extent, as it's also marked as metadata, we can only
* make sure it's a SYSTEM extent by its backref
*/
if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
struct extent_backref *node;
struct tree_backref *tback;
u64 bg_type;
node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
if (node->is_data) {
/* tree block shouldn't have data backref */
rec->wrong_chunk_type = 1;
return;
}
tback = container_of(node, struct tree_backref, node);
if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
else
bg_type = BTRFS_BLOCK_GROUP_METADATA;
if (!(bg_cache->flags & bg_type))
rec->wrong_chunk_type = 1;
}
}
/*
* Allocate a new extent record, fill default values from @tmpl and insert int
* @extent_cache. Caller is supposed to make sure the [start,nr) is not in
* the cache, otherwise it fails.
*/
static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
struct extent_record *tmpl)
{
struct extent_record *rec;
int ret = 0;
BUG_ON(tmpl->max_size == 0);
rec = malloc(sizeof(*rec));
if (!rec)
return -ENOMEM;
rec->start = tmpl->start;
rec->max_size = tmpl->max_size;
rec->nr = max(tmpl->nr, tmpl->max_size);
rec->found_rec = tmpl->found_rec;
rec->content_checked = tmpl->content_checked;
rec->owner_ref_checked = tmpl->owner_ref_checked;
rec->num_duplicates = 0;
rec->metadata = tmpl->metadata;
rec->flag_block_full_backref = FLAG_UNSET;
rec->bad_full_backref = 0;
rec->crossing_stripes = 0;
rec->wrong_chunk_type = 0;
rec->is_root = tmpl->is_root;
rec->refs = tmpl->refs;
rec->extent_item_refs = tmpl->extent_item_refs;
rec->parent_generation = tmpl->parent_generation;
btrfs-progs: check: Initialize extent_record::generation member [BUG] When using `btrfs check --init-extent-tree`, there is a pretty high chance that the result fs can't pass tree-checker: BTRFS critical (device dm-3): corrupt leaf: block=5390336 slot=149 extent bytenr=20115456 len=4096 invalid generation, have 16384 expect (0, 360] BTRFS error (device dm-3): block=5390336 read time tree block corruption detected BTRFS error (device dm-3): failed to read block groups: -5 BTRFS error (device dm-3): open_ctree failed [CAUSE] The result fs has a pretty screwed up EXTENT_ITEMs for data extents: item 148 key (20111360 EXTENT_ITEM 4096) itemoff 8777 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 841 offset 0 count 1 item 149 key (20115456 EXTENT_ITEM 4096) itemoff 8724 itemsize 53 refs 1 gen 16384 flags DATA extent data backref root FS_TREE objectid 906 offset 0 count 1 Kernel tree-checker will accept 0 generation, but that 16384 generation is definitely going to trigger the alarm. Looking into the code, it's add_extent_rec_nolookup() allocating a new extent_rec, but not copying all members from parameter @tmpl, resulting generation not properly initialized. [FIX] Just copy tmpl->generation in add_extent_rec_nolookup(). And since all call sites have set all members of @tmpl to 0 before add_extent_rec_nolookup(), we shouldn't get garbage values. For the 0 generation problem, it will be solved in another patch. Issue: #225 (Not the initial report, but extent tree rebuild result) Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:16 +08:00
rec->generation = tmpl->generation;
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->dups);
INIT_LIST_HEAD(&rec->list);
rec->backref_tree = RB_ROOT;
memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
rec->cache.start = tmpl->start;
rec->cache.size = tmpl->nr;
ret = insert_cache_extent(extent_cache, &rec->cache);
if (ret) {
free(rec);
return ret;
}
bytes_used += rec->nr;
if (tmpl->metadata)
rec->crossing_stripes = check_crossing_stripes(gfs_info,
rec->start, gfs_info->nodesize);
check_extent_type(rec);
return ret;
}
/*
* Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
* some are hints:
* - refs - if found, increase refs
* - is_root - if found, set
* - content_checked - if found, set
* - owner_ref_checked - if found, set
*
* If not found, create a new one, initialize and insert.
*/
static int add_extent_rec(struct cache_tree *extent_cache,
struct extent_record *tmpl)
{
struct extent_record *rec;
struct cache_extent *cache;
int ret = 0;
int dup = 0;
cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
if (cache) {
rec = container_of(cache, struct extent_record, cache);
if (tmpl->refs)
rec->refs++;
if (rec->nr == 1)
rec->nr = max(tmpl->nr, tmpl->max_size);
/*
* We need to make sure to reset nr to whatever the extent
* record says was the real size, this way we can compare it to
* the backrefs.
*/
if (tmpl->found_rec) {
if (tmpl->start != rec->start || rec->found_rec) {
struct extent_record *tmp;
dup = 1;
if (list_empty(&rec->list))
list_add_tail(&rec->list,
&duplicate_extents);
/*
* We have to do this song and dance in case we
* find an extent record that falls inside of
* our current extent record but does not have
* the same objectid.
*/
tmp = malloc(sizeof(*tmp));
if (!tmp)
return -ENOMEM;
tmp->start = tmpl->start;
tmp->max_size = tmpl->max_size;
tmp->nr = tmpl->nr;
tmp->found_rec = 1;
tmp->metadata = tmpl->metadata;
tmp->extent_item_refs = tmpl->extent_item_refs;
INIT_LIST_HEAD(&tmp->list);
list_add_tail(&tmp->list, &rec->dups);
rec->num_duplicates++;
} else {
rec->nr = tmpl->nr;
rec->found_rec = 1;
}
}
if (tmpl->extent_item_refs && !dup) {
if (rec->extent_item_refs) {
fprintf(stderr,
"block %llu rec extent_item_refs %llu, passed %llu\n",
(unsigned long long)tmpl->start,
(unsigned long long)
rec->extent_item_refs,
(unsigned long long)
tmpl->extent_item_refs);
}
rec->extent_item_refs = tmpl->extent_item_refs;
}
if (tmpl->is_root)
rec->is_root = 1;
if (tmpl->content_checked)
rec->content_checked = 1;
if (tmpl->owner_ref_checked)
rec->owner_ref_checked = 1;
memcpy(&rec->parent_key, &tmpl->parent_key,
sizeof(tmpl->parent_key));
if (tmpl->parent_generation)
rec->parent_generation = tmpl->parent_generation;
if (rec->max_size < tmpl->max_size)
rec->max_size = tmpl->max_size;
/*
* A metadata extent can't cross stripe_len boundary, otherwise
* kernel scrub won't be able to handle it.
* As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
* it.
*/
if (tmpl->metadata)
rec->crossing_stripes = check_crossing_stripes(
gfs_info, rec->start,
gfs_info->nodesize);
check_extent_type(rec);
maybe_free_extent_rec(extent_cache, rec);
return ret;
}
ret = add_extent_rec_nolookup(extent_cache, tmpl);
return ret;
}
static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
u64 parent, u64 root, int found_ref)
{
struct extent_record *rec;
struct tree_backref *back;
struct cache_extent *cache;
int ret;
bool insert = false;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache) {
struct extent_record tmpl;
memset(&tmpl, 0, sizeof(tmpl));
tmpl.start = bytenr;
tmpl.nr = 1;
tmpl.metadata = 1;
tmpl.max_size = 1;
ret = add_extent_rec_nolookup(extent_cache, &tmpl);
if (ret)
return ret;
/* really a bug in cache_extent implement now */
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache)
return -ENOENT;
}
rec = container_of(cache, struct extent_record, cache);
if (rec->start != bytenr) {
/*
* Several cause, from unaligned bytenr to over lapping extents
*/
return -EEXIST;
}
back = find_tree_backref(rec, parent, root);
if (!back) {
back = alloc_tree_backref(rec, parent, root);
if (!back)
return -ENOMEM;
insert = true;
}
if (found_ref) {
if (back->node.found_ref) {
fprintf(stderr,
"Extent back ref already exists for %llu parent %llu root %llu\n",
(unsigned long long)bytenr,
(unsigned long long)parent,
(unsigned long long)root);
}
back->node.found_ref = 1;
} else {
if (back->node.found_extent_tree) {
fprintf(stderr,
"extent back ref already exists for %llu parent %llu root %llu\n",
(unsigned long long)bytenr,
(unsigned long long)parent,
(unsigned long long)root);
}
back->node.found_extent_tree = 1;
}
if (insert)
WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
compare_extent_backref));
check_extent_type(rec);
maybe_free_extent_rec(extent_cache, rec);
return 0;
}
static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
u64 parent, u64 root, u64 owner, u64 offset,
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
u32 num_refs, u64 gen, int found_ref, u64 max_size)
{
struct extent_record *rec;
struct data_backref *back;
struct cache_extent *cache;
int ret;
bool insert = false;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache) {
struct extent_record tmpl;
memset(&tmpl, 0, sizeof(tmpl));
tmpl.start = bytenr;
tmpl.nr = 1;
tmpl.max_size = max_size;
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
tmpl.generation = gen;
ret = add_extent_rec_nolookup(extent_cache, &tmpl);
if (ret)
return ret;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (!cache)
abort();
}
rec = container_of(cache, struct extent_record, cache);
if (rec->max_size < max_size)
rec->max_size = max_size;
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
if (rec->generation < gen)
rec->generation = gen;
/*
* If found_ref is set then max_size is the real size and must match the
* existing refs. So if we have already found a ref then we need to
* make sure that this ref matches the existing one, otherwise we need
* to add a new backref so we can notice that the backrefs don't match
* and we need to figure out who is telling the truth. This is to
* account for that awful fsync bug I introduced where we'd end up with
* a btrfs_file_extent_item that would have its length include multiple
* prealloc extents or point inside of a prealloc extent.
*/
back = find_data_backref(rec, parent, root, owner, offset, found_ref,
bytenr, max_size);
if (!back) {
back = alloc_data_backref(rec, parent, root, owner, offset,
max_size);
BUG_ON(!back);
insert = true;
}
if (found_ref) {
BUG_ON(num_refs != 1);
if (back->node.found_ref)
BUG_ON(back->bytes != max_size);
back->node.found_ref = 1;
back->found_ref += 1;
if (back->bytes != max_size || back->disk_bytenr != bytenr) {
back->bytes = max_size;
back->disk_bytenr = bytenr;
/* Need to reinsert if not already in the tree */
if (!insert) {
rb_erase(&back->node.node, &rec->backref_tree);
insert = true;
}
}
rec->refs += 1;
rec->content_checked = 1;
rec->owner_ref_checked = 1;
} else {
if (back->node.found_extent_tree) {
fprintf(stderr,
"Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
(unsigned long long)bytenr,
(unsigned long long)parent,
(unsigned long long)root,
(unsigned long long)owner,
(unsigned long long)offset,
(unsigned long)num_refs);
}
back->num_refs = num_refs;
back->node.found_extent_tree = 1;
}
if (insert)
WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
compare_extent_backref));
maybe_free_extent_rec(extent_cache, rec);
return 0;
}
static int add_pending(struct cache_tree *pending,
struct cache_tree *seen, u64 bytenr, u32 size)
{
int ret;
ret = add_cache_extent(seen, bytenr, size);
if (ret)
return ret;
ret = add_cache_extent(pending, bytenr, size);
if (ret) {
struct cache_extent *entry;
entry = lookup_cache_extent(seen, bytenr, size);
if (entry && entry->start == bytenr && entry->size == size) {
remove_cache_extent(seen, entry);
free(entry);
}
return ret;
}
return 0;
}
static int pick_next_pending(struct cache_tree *pending,
struct cache_tree *reada,
struct cache_tree *nodes,
u64 last, struct block_info *bits, int bits_nr,
int *reada_bits)
{
unsigned long node_start = last;
struct cache_extent *cache;
int ret;
cache = search_cache_extent(reada, 0);
if (cache) {
bits[0].start = cache->start;
bits[0].size = cache->size;
*reada_bits = 1;
return 1;
}
*reada_bits = 0;
if (node_start > 32768)
node_start -= 32768;
cache = search_cache_extent(nodes, node_start);
if (!cache)
cache = search_cache_extent(nodes, 0);
if (!cache) {
cache = search_cache_extent(pending, 0);
if (!cache)
return 0;
ret = 0;
do {
bits[ret].start = cache->start;
bits[ret].size = cache->size;
cache = next_cache_extent(cache);
ret++;
} while (cache && ret < bits_nr);
return ret;
}
ret = 0;
do {
bits[ret].start = cache->start;
bits[ret].size = cache->size;
cache = next_cache_extent(cache);
ret++;
} while (cache && ret < bits_nr);
if (bits_nr - ret > 8) {
u64 lookup = bits[0].start + bits[0].size;
struct cache_extent *next;
next = search_cache_extent(pending, lookup);
while (next) {
if (next->start - lookup > 32768)
break;
bits[ret].start = next->start;
bits[ret].size = next->size;
lookup = next->start + next->size;
ret++;
if (ret == bits_nr)
break;
next = next_cache_extent(next);
if (!next)
break;
}
}
return ret;
}
static void free_chunk_record(struct cache_extent *cache)
{
struct chunk_record *rec;
rec = container_of(cache, struct chunk_record, cache);
list_del_init(&rec->list);
list_del_init(&rec->dextents);
free(rec);
}
void free_chunk_cache_tree(struct cache_tree *chunk_cache)
{
cache_tree_free_extents(chunk_cache, free_chunk_record);
}
static void free_device_record(struct rb_node *node)
{
struct device_record *rec;
rec = container_of(node, struct device_record, node);
free(rec);
}
FREE_RB_BASED_TREE(device_cache, free_device_record);
int insert_block_group_record(struct block_group_tree *tree,
struct block_group_record *bg_rec)
{
int ret;
ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
if (ret)
return ret;
list_add_tail(&bg_rec->list, &tree->block_groups);
return 0;
}
static void free_block_group_record(struct cache_extent *cache)
{
struct block_group_record *rec;
rec = container_of(cache, struct block_group_record, cache);
list_del_init(&rec->list);
free(rec);
}
void free_block_group_tree(struct block_group_tree *tree)
{
extent_io_tree_cleanup(&tree->pending_extents);
cache_tree_free_extents(&tree->tree, free_block_group_record);
}
static void update_block_group_used(struct block_group_tree *tree,
u64 bytenr, u64 num_bytes)
{
struct cache_extent *bg_item;
struct block_group_record *bg_rec;
bg_item = lookup_cache_extent(&tree->tree, bytenr, num_bytes);
if (!bg_item) {
set_extent_dirty(&tree->pending_extents, bytenr,
bytenr + num_bytes - 1);
return;
}
bg_rec = container_of(bg_item, struct block_group_record, cache);
bg_rec->actual_used += num_bytes;
}
int insert_device_extent_record(struct device_extent_tree *tree,
struct device_extent_record *de_rec)
{
int ret;
/*
* Device extent is a bit different from the other extents, because
* the extents which belong to the different devices may have the
* same start and size, so we need use the special extent cache
* search/insert functions.
*/
ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
if (ret)
return ret;
list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
return 0;
}
static void free_device_extent_record(struct cache_extent *cache)
{
struct device_extent_record *rec;
rec = container_of(cache, struct device_extent_record, cache);
if (!list_empty(&rec->chunk_list))
list_del_init(&rec->chunk_list);
if (!list_empty(&rec->device_list))
list_del_init(&rec->device_list);
free(rec);
}
void free_device_extent_tree(struct device_extent_tree *tree)
{
cache_tree_free_extents(&tree->tree, free_device_extent_record);
}
struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
struct btrfs_key *key,
int slot)
{
struct btrfs_chunk *ptr;
struct chunk_record *rec;
int num_stripes, i;
ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
rec = calloc(1, btrfs_chunk_record_size(num_stripes));
if (!rec) {
error_msg(ERROR_MSG_MEMORY, NULL);
exit(-1);
}
INIT_LIST_HEAD(&rec->list);
INIT_LIST_HEAD(&rec->dextents);
rec->bg_rec = NULL;
rec->cache.start = key->offset;
rec->cache.size = btrfs_chunk_length(leaf, ptr);
rec->generation = btrfs_header_generation(leaf);
rec->objectid = key->objectid;
rec->type = key->type;
rec->offset = key->offset;
rec->length = rec->cache.size;
rec->owner = btrfs_chunk_owner(leaf, ptr);
rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
rec->type_flags = btrfs_chunk_type(leaf, ptr);
rec->io_width = btrfs_chunk_io_width(leaf, ptr);
rec->io_align = btrfs_chunk_io_align(leaf, ptr);
rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
rec->num_stripes = num_stripes;
rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
for (i = 0; i < rec->num_stripes; ++i) {
rec->stripes[i].devid =
btrfs_stripe_devid_nr(leaf, ptr, i);
rec->stripes[i].offset =
btrfs_stripe_offset_nr(leaf, ptr, i);
read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
(unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
BTRFS_UUID_SIZE);
}
return rec;
}
static int process_chunk_item(struct cache_tree *chunk_cache,
struct btrfs_key *key, struct extent_buffer *eb,
int slot)
{
struct chunk_record *rec;
struct btrfs_chunk *chunk;
int ret = 0;
chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
/*
* Do extra check for this chunk item,
*
* It's still possible one can craft a leaf with CHUNK_ITEM, with
* wrong onwer(3) out of chunk tree, to pass both chunk tree check
* and owner<->key_type check.
*/
ret = btrfs_check_chunk_valid(gfs_info, eb, chunk, slot,
key->offset);
if (ret < 0) {
error("chunk(%llu, %llu) is not valid, ignore it",
key->offset, btrfs_chunk_length(eb, chunk));
return 0;
}
rec = btrfs_new_chunk_record(eb, key, slot);
ret = insert_cache_extent(chunk_cache, &rec->cache);
if (ret) {
fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
rec->offset, rec->length);
free(rec);
}
return ret;
}
static int process_device_item(struct rb_root *dev_cache,
struct btrfs_key *key, struct extent_buffer *eb, int slot)
{
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
struct btrfs_device *device;
struct btrfs_dev_item *ptr;
struct device_record *rec;
int ret = 0;
ptr = btrfs_item_ptr(eb,
slot, struct btrfs_dev_item);
rec = malloc(sizeof(*rec));
if (!rec) {
error_msg(ERROR_MSG_MEMORY, NULL);
return -ENOMEM;
}
rec->devid = key->offset;
rec->generation = btrfs_header_generation(eb);
rec->objectid = key->objectid;
rec->type = key->type;
rec->offset = key->offset;
rec->devid = btrfs_device_id(eb, ptr);
rec->total_byte = btrfs_device_total_bytes(eb, ptr);
rec->byte_used = btrfs_device_bytes_used(eb, ptr);
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
rec->bad_block_dev_size = false;
device = btrfs_find_device_by_devid(gfs_info->fs_devices, rec->devid, 0);
if (device && device->fd >= 0) {
struct stat st;
u64 block_dev_size;
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
ret = fstat(device->fd, &st);
if (ret < 0) {
warning(
"unable to open devid %llu, skipping its block device size check",
device->devid);
goto skip;
}
block_dev_size = device_get_partition_size_fd_stat(device->fd, &st);
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
if (block_dev_size < rec->total_byte) {
error(
"block device size is smaller than total_bytes in device item, has %llu expect >= %llu",
block_dev_size, rec->total_byte);
rec->bad_block_dev_size = true;
}
}
skip:
ret = rb_insert(dev_cache, &rec->node, device_record_compare);
if (ret) {
fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
free(rec);
}
return ret;
}
struct block_group_record *
btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
int slot)
{
struct btrfs_block_group_item *ptr;
struct block_group_record *rec;
rec = calloc(1, sizeof(*rec));
if (!rec) {
error_msg(ERROR_MSG_MEMORY, NULL);
exit(-1);
}
rec->cache.start = key->objectid;
rec->cache.size = key->offset;
rec->generation = btrfs_header_generation(leaf);
rec->objectid = key->objectid;
rec->type = key->type;
rec->offset = key->offset;
ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
rec->flags = btrfs_block_group_flags(leaf, ptr);
rec->disk_used = btrfs_block_group_used(leaf, ptr);
INIT_LIST_HEAD(&rec->list);
return rec;
}
static int process_block_group_item(struct block_group_tree *block_group_cache,
struct btrfs_key *key,
struct extent_buffer *eb, int slot)
{
struct block_group_record *rec;
u64 start, end;
int ret = 0;
rec = btrfs_new_block_group_record(eb, key, slot);
ret = insert_block_group_record(block_group_cache, rec);
if (ret) {
fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
rec->objectid, rec->offset);
free(rec);
return ret;
}
while (!find_first_extent_bit(&block_group_cache->pending_extents,
rec->objectid, &start, &end, EXTENT_DIRTY)) {
u64 len;
if (start >= rec->objectid + rec->offset)
break;
start = max(start, rec->objectid);
len = min(end - start + 1, rec->objectid + rec->offset - start);
rec->actual_used += len;
clear_extent_dirty(&block_group_cache->pending_extents, start,
start + len - 1);
}
return ret;
}
struct device_extent_record *
btrfs_new_device_extent_record(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct device_extent_record *rec;
struct btrfs_dev_extent *ptr;
rec = calloc(1, sizeof(*rec));
if (!rec) {
error_msg(ERROR_MSG_MEMORY, NULL);
exit(-1);
}
rec->cache.objectid = key->objectid;
rec->cache.start = key->offset;
rec->generation = btrfs_header_generation(leaf);
rec->objectid = key->objectid;
rec->type = key->type;
rec->offset = key->offset;
ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
rec->chunk_objectid =
btrfs_dev_extent_chunk_objectid(leaf, ptr);
rec->chunk_offset =
btrfs_dev_extent_chunk_offset(leaf, ptr);
rec->length = btrfs_dev_extent_length(leaf, ptr);
rec->cache.size = rec->length;
INIT_LIST_HEAD(&rec->chunk_list);
INIT_LIST_HEAD(&rec->device_list);
return rec;
}
static int
process_device_extent_item(struct device_extent_tree *dev_extent_cache,
struct btrfs_key *key, struct extent_buffer *eb,
int slot)
2007-05-22 22:07:10 +08:00
{
struct device_extent_record *rec;
int ret;
rec = btrfs_new_device_extent_record(eb, key, slot);
ret = insert_device_extent_record(dev_extent_cache, rec);
if (ret) {
fprintf(stderr,
"Device extent[%llu, %llu, %llu] existed.\n",
rec->objectid, rec->offset, rec->length);
2007-05-22 22:07:10 +08:00
free(rec);
}
return ret;
2007-05-22 22:07:10 +08:00
}
static int process_extent_item(struct btrfs_root *root,
struct cache_tree *extent_cache,
struct block_group_tree *block_group_cache,
struct extent_buffer *eb, int slot)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
struct btrfs_key key;
struct extent_record tmpl;
unsigned long end;
unsigned long ptr;
int ret;
int type;
u32 item_size = btrfs_item_size(eb, slot);
u64 refs = 0;
u64 offset;
u64 num_bytes;
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
u64 gen;
int metadata = 0;
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.type == BTRFS_METADATA_ITEM_KEY) {
metadata = 1;
num_bytes = gfs_info->nodesize;
} else {
num_bytes = key.offset;
}
update_block_group_used(block_group_cache, key.objectid, num_bytes);
if (!IS_ALIGNED(key.objectid, gfs_info->sectorsize)) {
error("ignoring invalid extent, bytenr %llu is not aligned to %u",
key.objectid, gfs_info->sectorsize);
return -EIO;
}
if (item_size < sizeof(*ei)) {
error(
"corrupted or unsupported extent item found, item size=%u expect minimal size=%zu",
item_size, sizeof(*ei));
return -EIO;
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
refs = btrfs_extent_refs(eb, ei);
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
gen = btrfs_extent_generation(eb, ei);
if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
metadata = 1;
else
metadata = 0;
if (metadata && num_bytes != gfs_info->nodesize) {
error("ignore invalid metadata extent, length %llu does not equal to %u",
num_bytes, gfs_info->nodesize);
return -EIO;
}
if (!metadata && !IS_ALIGNED(num_bytes, gfs_info->sectorsize)) {
error("ignore invalid data extent, length %llu is not aligned to %u",
num_bytes, gfs_info->sectorsize);
return -EIO;
}
if (metadata)
btrfs_check_subpage_eb_alignment(gfs_info, key.objectid, num_bytes);
ptr = (unsigned long)(ei + 1);
if (metadata) {
u64 level;
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)ptr;
level = btrfs_tree_block_level(eb, info);
} else {
level = key.offset;
}
if (level >= BTRFS_MAX_LEVEL) {
error(
"tree block %llu has bad backref level, has %llu expect [0, %u]",
key.objectid, level, BTRFS_MAX_LEVEL - 1);
return -EUCLEAN;
}
}
memset(&tmpl, 0, sizeof(tmpl));
tmpl.start = key.objectid;
tmpl.nr = num_bytes;
tmpl.extent_item_refs = refs;
tmpl.metadata = metadata;
tmpl.found_rec = 1;
tmpl.max_size = num_bytes;
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
tmpl.generation = gen;
add_extent_rec(extent_cache, &tmpl);
ptr = (unsigned long)(ei + 1);
if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
key.type == BTRFS_EXTENT_ITEM_KEY)
ptr += sizeof(struct btrfs_tree_block_info);
end = (unsigned long)ei + item_size;
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(eb, iref);
offset = btrfs_extent_inline_ref_offset(eb, iref);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
ret = add_tree_backref(extent_cache, key.objectid,
0, offset, 0);
if (ret < 0) {
errno = -ret;
error(
"add_tree_backref failed (extent items tree block): %m");
}
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = add_tree_backref(extent_cache, key.objectid,
offset, 0, 0);
if (ret < 0) {
errno = -ret;
error(
"add_tree_backref failed (extent items shared block): %m");
}
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
add_data_backref(extent_cache, key.objectid, 0,
btrfs_extent_data_ref_root(eb, dref),
btrfs_extent_data_ref_objectid(eb,
dref),
btrfs_extent_data_ref_offset(eb, dref),
btrfs_extent_data_ref_count(eb, dref),
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
gen, 0, num_bytes);
break;
case BTRFS_SHARED_DATA_REF_KEY:
sref = (struct btrfs_shared_data_ref *)(iref + 1);
add_data_backref(extent_cache, key.objectid, offset,
0, 0, 0,
btrfs_shared_data_ref_count(eb, sref),
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
gen, 0, num_bytes);
break;
default:
fprintf(stderr,
"corrupt extent record: key [%llu,%u,%llu]\n",
key.objectid, key.type, num_bytes);
goto out;
}
ptr += btrfs_extent_inline_ref_size(type);
}
WARN_ON(ptr > end);
out:
return 0;
}
/*
* Check data checksum for [@bytenr, @bytenr + @num_bytes).
*
* Return <0 for fatal error (fails to read checksum/data or allocate memory).
* Return >0 for csum mismatch for any copy.
* Return 0 if everything is OK.
*/
static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
u64 num_bytes, unsigned long leaf_offset,
struct extent_buffer *eb)
{
u64 offset = 0;
u16 csum_size = gfs_info->csum_size;
u16 csum_type = gfs_info->csum_type;
u8 *data;
unsigned long csum_offset;
u8 result[BTRFS_CSUM_SIZE];
u8 csum_expected[BTRFS_CSUM_SIZE];
u64 read_len;
u64 data_checked = 0;
u64 tmp;
int ret = 0;
int mirror;
int num_copies;
bool csum_mismatch = false;
if (num_bytes % gfs_info->sectorsize)
return -EINVAL;
data = malloc(num_bytes);
if (!data)
return -ENOMEM;
num_copies = btrfs_num_copies(gfs_info, bytenr, num_bytes);
while (offset < num_bytes) {
/*
* Mirror 0 means 'read from any valid copy', so it's skipped.
* The indexes 1-N represent the n-th copy for levels with
* redundancy.
*/
for (mirror = 1; mirror <= num_copies; mirror++) {
read_len = num_bytes - offset;
/* read as much space once a time */
ret = read_data_from_disk(gfs_info, (char *)data + offset,
bytenr + offset, &read_len, mirror);
if (ret)
goto out;
data_checked = 0;
/* verify every 4k data's checksum */
while (data_checked < read_len) {
tmp = offset + data_checked;
btrfs_csum_data(gfs_info, csum_type, data + tmp,
result, gfs_info->sectorsize);
csum_offset = leaf_offset +
tmp / gfs_info->sectorsize * csum_size;
read_extent_buffer(eb, (char *)&csum_expected,
csum_offset, csum_size);
if (memcmp(result, csum_expected, csum_size) != 0) {
char found[BTRFS_CSUM_STRING_LEN];
char want[BTRFS_CSUM_STRING_LEN];
csum_mismatch = true;
btrfs_format_csum(csum_type, result, found);
btrfs_format_csum(csum_type,
csum_expected, want);
fprintf(stderr,
"mirror %d bytenr %llu csum %s expected csum %s\n",
mirror, bytenr + tmp, found, want);
}
data_checked += gfs_info->sectorsize;
}
}
offset += read_len;
}
out:
free(data);
if (!ret && csum_mismatch)
ret = 1;
2007-05-22 21:17:08 +08:00
return ret;
2007-04-24 03:56:27 +08:00
}
static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
struct btrfs_root *extent_root;
struct btrfs_path path;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
btrfs_init_path(&path);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
again:
extent_root = btrfs_extent_root(gfs_info, key.objectid);
ret = btrfs_search_slot(NULL, extent_root, &key, &path,
0, 0);
if (ret < 0) {
fprintf(stderr, "Error looking up extent record %d\n", ret);
btrfs_release_path(&path);
return ret;
} else if (ret) {
if (path.slots[0] > 0) {
path.slots[0]--;
} else {
ret = btrfs_prev_leaf(extent_root, &path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
ret = 0;
goto out;
}
}
}
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
/*
* Block group items come before extent items if they have the same
* bytenr, so walk back one more just in case. Dear future traveller,
* first congrats on mastering time travel. Now if it's not too much
* trouble could you go back to 2006 and tell Chris to make the
* BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
* EXTENT_ITEM_KEY please?
*/
while (key.type > BTRFS_EXTENT_ITEM_KEY) {
if (path.slots[0] > 0) {
path.slots[0]--;
} else {
ret = btrfs_prev_leaf(extent_root, &path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
ret = 0;
goto out;
}
}
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
}
while (num_bytes) {
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(extent_root, &path);
if (ret < 0) {
fprintf(stderr, "Error going to next leaf "
"%d\n", ret);
btrfs_release_path(&path);
return ret;
} else if (ret) {
break;
}
}
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.type != BTRFS_EXTENT_ITEM_KEY) {
path.slots[0]++;
continue;
}
if (key.objectid + key.offset < bytenr) {
path.slots[0]++;
continue;
}
if (key.objectid > bytenr + num_bytes)
break;
if (key.objectid == bytenr) {
if (key.offset >= num_bytes) {
num_bytes = 0;
break;
}
num_bytes -= key.offset;
bytenr += key.offset;
} else if (key.objectid < bytenr) {
if (key.objectid + key.offset >= bytenr + num_bytes) {
num_bytes = 0;
break;
}
num_bytes = (bytenr + num_bytes) -
(key.objectid + key.offset);
bytenr = key.objectid + key.offset;
} else {
if (key.objectid + key.offset < bytenr + num_bytes) {
u64 new_start = key.objectid + key.offset;
u64 new_bytes = bytenr + num_bytes - new_start;
/*
* Weird case, the extent is in the middle of
* our range, we'll have to search one side
* and then the other. Not sure if this happens
* in real life, but no harm in coding it up
* anyway just in case.
*/
btrfs_release_path(&path);
ret = check_extent_exists(extent_root,
new_start,
new_bytes);
if (ret) {
fprintf(stderr, "Right section didn't "
"have a record\n");
break;
}
num_bytes = key.objectid - bytenr;
goto again;
}
num_bytes = key.objectid - bytenr;
}
path.slots[0]++;
}
ret = 0;
out:
if (num_bytes && !ret) {
fprintf(stderr,
"there are no extents for csum range %llu-%llu\n",
bytenr, bytenr+num_bytes);
ret = 1;
}
btrfs_release_path(&path);
return ret;
}
static int check_csum_root(struct btrfs_root *root)
{
struct btrfs_path path;
struct extent_buffer *leaf;
struct btrfs_key key;
u64 last_data_end = 0;
u64 offset = 0, num_bytes = 0;
u16 csum_size = gfs_info->csum_size;
int errors = 0;
int ret;
u64 data_len;
unsigned long leaf_offset;
bool verify_csum = !!check_data_csum;
u16 num_entries, max_entries;
max_entries = ((BTRFS_LEAF_DATA_SIZE(gfs_info) -
(sizeof(struct btrfs_item) * 2)) / csum_size) - 1;
if (!extent_buffer_uptodate(root->node)) {
fprintf(stderr, "No valid csum tree found\n");
return -ENOENT;
}
btrfs_init_path(&path);
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
fprintf(stderr, "Error searching csum tree %d\n", ret);
btrfs_release_path(&path);
return ret;
}
if (ret > 0 && path.slots[0])
path.slots[0]--;
ret = 0;
/*
* For metadata dump (btrfs-image) all data is wiped so verifying data
* csum is meaningless and will always report csum error.
*/
if (check_data_csum && (btrfs_super_flags(gfs_info->super_copy) &
(BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))) {
printf("skip data csum verification for metadata dump\n");
verify_csum = false;
}
while (1) {
g_task_ctx.item_count++;
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(root, &path);
if (ret < 0) {
fprintf(stderr, "Error going to next leaf "
"%d\n", ret);
break;
}
if (ret)
break;
}
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.type != BTRFS_EXTENT_CSUM_KEY) {
path.slots[0]++;
continue;
}
if (key.offset < last_data_end) {
error(
"csum overlap, current bytenr=%llu prev_end=%llu, eb=%llu slot=%u",
key.offset, last_data_end, leaf->start,
path.slots[0]);
errors++;
}
num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
data_len = num_entries * gfs_info->sectorsize;
if (num_entries > max_entries) {
error(
"csum too large, current bytenr=%llu eb=%llu slot=%u (%u entries, max %u)",
key.offset, leaf->start, path.slots[0],
num_entries, max_entries);
errors++;
}
if (!verify_csum)
goto skip_csum_check;
leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
ret = check_extent_csums(root, key.offset, data_len,
leaf_offset, leaf);
/*
* Only break for fatal errors, if mismatch is found, continue
* checking until all extents are checked.
*/
if (ret < 0)
break;
if (ret > 0)
errors++;
skip_csum_check:
if (!num_bytes) {
offset = key.offset;
} else if (key.offset != offset + num_bytes) {
ret = check_extent_exists(root, offset, num_bytes);
if (ret) {
fprintf(stderr,
"csum exists for %llu-%llu but there is no extent record\n",
offset, offset+num_bytes);
errors++;
}
offset = key.offset;
num_bytes = 0;
}
num_bytes += data_len;
last_data_end = key.offset + data_len;
path.slots[0]++;
}
btrfs_release_path(&path);
return errors;
}
static int check_csums(void)
{
struct rb_node *n;
struct btrfs_root *root;
int ret;
root = btrfs_csum_root(gfs_info, 0);
while (1) {
ret = check_csum_root(root);
if (ret)
break;
n = rb_next(&root->rb_node);
if (!n)
break;
root = rb_entry(n, struct btrfs_root, rb_node);
if (root->root_key.objectid != BTRFS_CSUM_TREE_OBJECTID)
break;
}
return ret;
}
static int is_dropped_key(struct btrfs_key *key,
struct btrfs_key *drop_key)
{
if (key->objectid < drop_key->objectid)
return 1;
else if (key->objectid == drop_key->objectid) {
if (key->type < drop_key->type)
return 1;
else if (key->type == drop_key->type) {
if (key->offset < drop_key->offset)
return 1;
}
}
return 0;
}
/*
* Here are the rules for FULL_BACKREF.
*
* 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
* 2) If btrfs_header_owner(buf) no longer points to buf then we have
* FULL_BACKREF set.
* 3) We cowed the block walking down a reloc tree. This is impossible to tell
* if it happened after the relocation occurred since we'll have dropped the
* reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
* have no real way to know for sure.
*
* We process the blocks one root at a time, and we start from the lowest root
* objectid and go to the highest. So we can just lookup the owner backref for
* the record and if we don't find it then we know it doesn't exist and we have
* a FULL BACKREF.
*
* FIXME: if we ever start reclaiming root objectid's then we need to fix this
* assumption and simply indicate that we _think_ that the FULL BACKREF needs to
* be set or not and then we can check later once we've gathered all the refs.
*/
static int calc_extent_flag(struct cache_tree *extent_cache,
struct extent_buffer *buf,
struct root_item_record *ri,
u64 *flags)
{
struct extent_record *rec;
struct cache_extent *cache;
struct tree_backref *tback;
u64 owner = 0;
cache = lookup_cache_extent(extent_cache, buf->start, 1);
/* we have added this extent before */
if (!cache)
return -ENOENT;
rec = container_of(cache, struct extent_record, cache);
/*
* Except file/reloc tree, we can not have
* FULL BACKREF MODE
*/
if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
goto normal;
/*
* root node
*/
if (buf->start == ri->bytenr)
goto normal;
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
goto full_backref;
owner = btrfs_header_owner(buf);
if (owner == ri->objectid)
goto normal;
tback = find_tree_backref(rec, 0, owner);
if (!tback)
goto full_backref;
normal:
*flags = 0;
if (rec->flag_block_full_backref != FLAG_UNSET &&
rec->flag_block_full_backref != 0)
rec->bad_full_backref = 1;
return 0;
full_backref:
*flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
if (rec->flag_block_full_backref != FLAG_UNSET &&
rec->flag_block_full_backref != 1)
rec->bad_full_backref = 1;
return 0;
}
static void report_mismatch_key_root(u8 key_type, u64 rootid)
{
fprintf(stderr, "Invalid key type(");
print_key_type(stderr, 0, key_type);
fprintf(stderr, ") found in root(");
print_objectid(stderr, rootid, 0);
fprintf(stderr, ")\n");
}
/*
* Check if the key is valid with its extent buffer.
*
* This is a early check in case invalid key exists in a extent buffer
* This is not comprehensive yet, but should prevent wrong key/item passed
* further
*/
static int check_type_with_root(u64 rootid, u8 key_type)
{
switch (key_type) {
/* Only valid in chunk tree */
case BTRFS_DEV_ITEM_KEY:
case BTRFS_CHUNK_ITEM_KEY:
if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
goto err;
break;
/* valid in csum and log tree */
case BTRFS_CSUM_TREE_OBJECTID:
if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
is_fstree(rootid)))
goto err;
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
goto err;
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
if (btrfs_fs_compat_ro(gfs_info, BLOCK_GROUP_TREE)) {
if (rootid != BTRFS_BLOCK_GROUP_TREE_OBJECTID)
goto err;
} else if (rootid != BTRFS_EXTENT_TREE_OBJECTID) {
goto err;
}
break;
case BTRFS_ROOT_ITEM_KEY:
if (rootid != BTRFS_ROOT_TREE_OBJECTID)
goto err;
break;
case BTRFS_DEV_EXTENT_KEY:
if (rootid != BTRFS_DEV_TREE_OBJECTID)
goto err;
break;
}
return 0;
err:
report_mismatch_key_root(key_type, rootid);
return -EINVAL;
}
static int run_next_block(struct btrfs_root *root,
struct block_info *bits,
int bits_nr,
u64 *last,
struct cache_tree *pending,
struct cache_tree *seen,
struct cache_tree *reada,
struct cache_tree *nodes,
struct cache_tree *extent_cache,
struct cache_tree *chunk_cache,
struct rb_root *dev_cache,
struct block_group_tree *block_group_cache,
struct device_extent_tree *dev_extent_cache,
struct root_item_record *ri)
{
struct extent_buffer *buf;
struct extent_record *rec = NULL;
u64 bytenr;
u32 size;
u64 parent;
u64 owner;
u64 flags;
u64 ptr;
u64 gen = 0;
int ret = 0;
int i;
int nritems;
struct btrfs_key key;
struct cache_extent *cache;
int reada_bits;
nritems = pick_next_pending(pending, reada, nodes, *last, bits,
bits_nr, &reada_bits);
if (nritems == 0)
return 1;
if (!reada_bits) {
for (i = 0; i < nritems; i++) {
ret = add_cache_extent(reada, bits[i].start,
bits[i].size);
if (ret == -EEXIST)
continue;
/* fixme, get the parent transid */
readahead_tree_block(gfs_info, bits[i].start, 0);
}
}
*last = bits[0].start;
bytenr = bits[0].start;
size = bits[0].size;
cache = lookup_cache_extent(pending, bytenr, size);
if (cache) {
remove_cache_extent(pending, cache);
free(cache);
}
cache = lookup_cache_extent(reada, bytenr, size);
if (cache) {
remove_cache_extent(reada, cache);
free(cache);
}
cache = lookup_cache_extent(nodes, bytenr, size);
if (cache) {
remove_cache_extent(nodes, cache);
free(cache);
}
cache = lookup_cache_extent(extent_cache, bytenr, size);
if (cache) {
rec = container_of(cache, struct extent_record, cache);
gen = rec->parent_generation;
}
/* fixme, get the real parent transid */
buf = read_tree_block(gfs_info, bytenr, gen);
if (!extent_buffer_uptodate(buf)) {
record_bad_block_io(extent_cache, bytenr, size);
goto out;
}
nritems = btrfs_header_nritems(buf);
flags = 0;
if (!init_extent_tree) {
ret = btrfs_lookup_extent_info(NULL, gfs_info, bytenr,
btrfs_header_level(buf), 1, NULL,
&flags);
if (ret < 0) {
ret = calc_extent_flag(extent_cache, buf, ri, &flags);
if (ret < 0) {
fprintf(stderr, "Couldn't calc extent flags\n");
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
}
}
} else {
flags = 0;
ret = calc_extent_flag(extent_cache, buf, ri, &flags);
if (ret < 0) {
fprintf(stderr, "Couldn't calc extent flags\n");
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
}
}
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (ri != NULL &&
ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
ri->objectid == btrfs_header_owner(buf)) {
/*
* Ok we got to this block from it's original owner and
* we have FULL_BACKREF set. Relocation can leave
* converted blocks over so this is altogether possible,
* however it's not possible if the generation > the
* last snapshot, so check for this case.
*/
if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
btrfs_header_generation(buf) > ri->last_snapshot) {
flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
rec->bad_full_backref = 1;
}
}
} else {
if (ri != NULL &&
(ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
rec->bad_full_backref = 1;
}
}
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
rec->flag_block_full_backref = 1;
parent = bytenr;
owner = 0;
} else {
rec->flag_block_full_backref = 0;
parent = 0;
owner = btrfs_header_owner(buf);
}
ret = check_block(root, extent_cache, buf, flags);
if (ret)
goto out;
if (btrfs_is_leaf(buf)) {
btree_space_waste += btrfs_leaf_free_space(buf);
for (i = 0; i < nritems; i++) {
struct btrfs_file_extent_item *fi;
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
unsigned long inline_offset;
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
inline_offset = offsetof(struct btrfs_file_extent_item,
disk_bytenr);
btrfs_item_key_to_cpu(buf, &key, i);
/*
* Check key type against the leaf owner.
* Could filter quite a lot of early error if
* owner is correct
*/
if (check_type_with_root(btrfs_header_owner(buf),
key.type)) {
fprintf(stderr, "ignoring invalid key\n");
continue;
}
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
process_extent_item(root, extent_cache,
block_group_cache, buf, i);
continue;
}
if (key.type == BTRFS_METADATA_ITEM_KEY) {
process_extent_item(root, extent_cache,
block_group_cache, buf, i);
continue;
}
if (key.type == BTRFS_EXTENT_CSUM_KEY) {
total_csum_bytes +=
btrfs_item_size(buf, i);
continue;
}
if (key.type == BTRFS_CHUNK_ITEM_KEY) {
process_chunk_item(chunk_cache, &key, buf, i);
continue;
}
if (key.type == BTRFS_DEV_ITEM_KEY) {
process_device_item(dev_cache, &key, buf, i);
continue;
}
if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
process_block_group_item(block_group_cache,
&key, buf, i);
continue;
}
if (key.type == BTRFS_DEV_EXTENT_KEY) {
process_device_extent_item(dev_extent_cache,
&key, buf, i);
continue;
}
/* Skip deprecated extent ref */
if (key.type == BTRFS_EXTENT_REF_V0_KEY)
continue;
if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
ret = add_tree_backref(extent_cache,
key.objectid, 0, key.offset, 0);
if (ret < 0) {
errno = -ret;
error(
"add_tree_backref failed (leaf tree block): %m");
}
continue;
}
if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
ret = add_tree_backref(extent_cache,
key.objectid, key.offset, 0, 0);
if (ret < 0) {
errno = -ret;
error(
"add_tree_backref failed (leaf shared block): %m");
}
continue;
}
if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
struct btrfs_extent_data_ref *ref;
ref = btrfs_item_ptr(buf, i,
struct btrfs_extent_data_ref);
add_data_backref(extent_cache,
key.objectid, 0,
btrfs_extent_data_ref_root(buf, ref),
btrfs_extent_data_ref_objectid(buf,
ref),
btrfs_extent_data_ref_offset(buf, ref),
btrfs_extent_data_ref_count(buf, ref),
0, 0, gfs_info->sectorsize);
continue;
}
if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
struct btrfs_shared_data_ref *ref;
ref = btrfs_item_ptr(buf, i,
struct btrfs_shared_data_ref);
add_data_backref(extent_cache,
key.objectid, key.offset, 0, 0, 0,
btrfs_shared_data_ref_count(buf, ref),
0, 0, gfs_info->sectorsize);
continue;
}
if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
struct bad_item *bad;
if (key.objectid == BTRFS_ORPHAN_OBJECTID)
continue;
if (!owner)
continue;
bad = malloc(sizeof(struct bad_item));
if (!bad)
continue;
INIT_LIST_HEAD(&bad->list);
memcpy(&bad->key, &key,
sizeof(struct btrfs_key));
bad->root_id = owner;
list_add_tail(&bad->list, &delete_items);
continue;
}
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
/* Check itemsize before we continue */
if (btrfs_item_size(buf, i) < inline_offset) {
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
ret = -EUCLEAN;
error(
btrfs-progs: fix printf formats on 32bit x86 When compiling btrfs-progs on 32bit x86 using GCC 11.1.0, there are several warnings: In file included from ./common/utils.h:30, from check/main.c:36: check/main.c: In function 'run_next_block': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ check/main.c:6496:33: note: in expansion of macro 'error' 6496 | error( | ^~~~~ In file included from ./common/utils.h:30, from kernel-shared/volumes.c:32: kernel-shared/volumes.c: In function 'btrfs_check_chunk_valid': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ kernel-shared/volumes.c:2052:17: note: in expansion of macro 'error' 2052 | error("invalid chunk item size, have %u expect [%zu, %lu)", | ^~~~~ image/main.c: In function 'search_for_chunk_blocks': ./common/messages.h:42:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t' {aka 'unsigned int'} [-Wformat=] 42 | __btrfs_error((fmt), ##__VA_ARGS__); \ | ^~~~~ image/main.c:2122:33: note: in expansion of macro 'error' 2122 | error( | ^~~~~ There are two types of problems: - __BTRFS_LEAF_DATA_SIZE() This macro has no type definition, making it behaves differently on different arches. Fix this by following kernel to use inline function to make its return value fixed to u32. - size_t related output For x86_64 %lu is OK but not for x86. Fix this by using %zu. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-27 20:48:46 +08:00
"invalid file extent item size, have %u expect (%lu, %u]",
btrfs_item_size(buf, i),
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
inline_offset,
BTRFS_LEAF_DATA_SIZE(gfs_info));
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
continue;
}
fi = btrfs_item_ptr(buf, i,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(buf, fi) ==
BTRFS_FILE_EXTENT_INLINE)
continue;
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
/* Prealloc/regular extent must have fixed item size */
if (btrfs_item_size(buf, i) !=
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
sizeof(struct btrfs_file_extent_item)) {
ret = -EUCLEAN;
error(
"invalid file extent item size, have %u expect %zu",
btrfs_item_size(buf, i),
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
sizeof(struct btrfs_file_extent_item));
continue;
}
/* key.offset (file offset) must be aligned */
if (!IS_ALIGNED(key.offset, gfs_info->sectorsize)) {
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
ret = -EUCLEAN;
error(
"invalid file offset, have %llu expect aligned to %u",
key.offset, gfs_info->sectorsize);
btrfs-progs: check/original: Do extra verification on file extent item [BUG] For certain fuzzed image, `btrfs check` will fail with the following call trace: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents Program received signal SIGABRT, Aborted. 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 (gdb) bt #0 0x00007ffff7c88f25 in raise () from /usr/lib/libc.so.6 #1 0x00007ffff7c72897 in abort () from /usr/lib/libc.so.6 #2 0x00005555555abc3e in run_next_block (...) at check/main.c:6398 #3 0x00005555555b0f36 in deal_root_from_list (...) at check/main.c:8408 #4 0x00005555555b1a3d in check_chunks_and_extents (fs_info=0x5555556a1e30) at check/main.c:8690 #5 0x00005555555b1e3e in do_check_chunks_and_extents (fs_info=0x5555556a1e30) a #6 0x00005555555b5710 in cmd_check (cmd=0x555555696920 <cmd_struct_check>, argc #7 0x0000555555568dc7 in cmd_execute (cmd=0x555555696920 <cmd_struct_check>, ar #8 0x0000555555569713 in main (argc=2, argv=0x7fffffffde70) at btrfs.c:386 [CAUSE] This fuzzed images has a corrupted EXTENT_DATA item in data reloc tree: item 1 key (256 EXTENT_DATA 256) itemoff 16111 itemsize 12 generation 0 type 2 (prealloc) prealloc data disk byte 16777216 nr 0 prealloc data offset 0 nr 0 There are several problems with the item: - Bad item size 12 is too small. - Bad key offset offset of EXTENT_DATA type key represents file offset, which should always be aligned to sector size (4K in this particular case). [FIX] Do extra item size and key offset check for original mode, and remove the abort() call in run_next_block(). And to show off how robust lowmem mode is, lowmem can handle it without any hiccup. With this fix, original mode can detect the problem properly: Checking filesystem on issue_213.raw UUID: 99e50868-0bda-4d89-b0e4-7e8560312ef9 [1/7] checking root items [2/7] checking extents ERROR: invalid file extent item size, have 12 expect (21, 16283] ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space cache [4/7] checking fs roots root 18446744073709551607 root dir 256 error root 18446744073709551607 inode 256 errors 62, no orphan item, odd file extent, bad file extent ERROR: errors found in fs roots found 131072 bytes used, error(s) found total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124774 file data blocks allocated: 0 referenced 0 Issue: #213 Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-18 09:19:38 +08:00
continue;
}
if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
continue;
data_bytes_allocated +=
btrfs_file_extent_disk_num_bytes(buf, fi);
data_bytes_referenced +=
btrfs_file_extent_num_bytes(buf, fi);
add_data_backref(extent_cache,
btrfs_file_extent_disk_bytenr(buf, fi),
parent, owner, key.objectid, key.offset -
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
btrfs_file_extent_offset(buf, fi), 1,
btrfs_file_extent_generation(buf, fi), 1,
btrfs_file_extent_disk_num_bytes(buf, fi));
}
} else {
int level;
level = btrfs_header_level(buf);
i = 0;
/*
* If we have a drop key we need to not walk down any slots we
* would have ignored when mounting the fs. These blocks are
* technically unreferenced and don't need to be worried about.
*/
if (ri != NULL && ri->drop_level && level > ri->drop_level) {
ret = btrfs_bin_search(buf, &ri->drop_key, &i);
if (ret && i > 0)
i--;
}
for (; i < nritems; i++) {
struct extent_record tmpl;
ptr = btrfs_node_blockptr(buf, i);
size = gfs_info->nodesize;
btrfs_node_key_to_cpu(buf, &key, i);
if (ri != NULL) {
if ((level == ri->drop_level) &&
is_dropped_key(&key, &ri->drop_key)) {
continue;
}
}
memset(&tmpl, 0, sizeof(tmpl));
tmpl.parent_key.objectid = key.objectid;
tmpl.parent_key.type = key.type;
tmpl.parent_key.offset = key.offset;
tmpl.parent_generation =
btrfs_node_ptr_generation(buf, i);
tmpl.start = ptr;
tmpl.nr = size;
tmpl.refs = 1;
tmpl.metadata = 1;
tmpl.max_size = size;
ret = add_extent_rec(extent_cache, &tmpl);
if (ret < 0)
goto out;
ret = add_tree_backref(extent_cache, ptr, parent,
owner, 1);
if (ret < 0) {
errno = -ret;
error(
"add_tree_backref failed (non-leaf block): %m");
continue;
}
if (level > 1)
add_pending(nodes, seen, ptr, size);
else
add_pending(pending, seen, ptr, size);
}
btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(gfs_info) -
nritems) * sizeof(struct btrfs_key_ptr);
}
total_btree_bytes += buf->len;
if (fs_root_objectid(btrfs_header_owner(buf)))
total_fs_tree_bytes += buf->len;
if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
total_extent_tree_bytes += buf->len;
out:
free_extent_buffer(buf);
return ret;
}
static int add_root_to_pending(struct extent_buffer *buf,
struct cache_tree *extent_cache,
struct cache_tree *pending,
struct cache_tree *seen,
struct cache_tree *nodes,
u64 objectid)
{
struct extent_record tmpl;
int ret;
if (btrfs_header_level(buf) > 0)
add_pending(nodes, seen, buf->start, buf->len);
else
add_pending(pending, seen, buf->start, buf->len);
memset(&tmpl, 0, sizeof(tmpl));
tmpl.start = buf->start;
tmpl.nr = buf->len;
tmpl.is_root = 1;
tmpl.refs = 1;
tmpl.metadata = 1;
tmpl.max_size = buf->len;
add_extent_rec(extent_cache, &tmpl);
if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
ret = add_tree_backref(extent_cache, buf->start, buf->start,
0, 1);
else
ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
1);
return ret;
}
/* as we fix the tree, we might be deleting blocks that
* we're tracking for repair. This hook makes sure we
* remove any backrefs for blocks as we are fixing them.
*/
static int free_extent_hook(u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset,
int refs_to_drop)
{
struct extent_record *rec;
struct cache_extent *cache;
int is_data;
struct cache_tree *extent_cache = gfs_info->fsck_extent_cache;
is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
if (!cache)
return 0;
rec = container_of(cache, struct extent_record, cache);
if (is_data) {
struct data_backref *back;
back = find_data_backref(rec, parent, root_objectid, owner,
offset, 1, bytenr, num_bytes);
if (!back)
goto out;
if (back->node.found_ref) {
back->found_ref -= refs_to_drop;
if (rec->refs)
rec->refs -= refs_to_drop;
}
if (back->node.found_extent_tree) {
back->num_refs -= refs_to_drop;
if (rec->extent_item_refs)
rec->extent_item_refs -= refs_to_drop;
}
if (back->found_ref == 0)
back->node.found_ref = 0;
if (back->num_refs == 0)
back->node.found_extent_tree = 0;
if (!back->node.found_extent_tree && back->node.found_ref) {
rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
} else {
struct tree_backref *back;
back = find_tree_backref(rec, parent, root_objectid);
if (!back)
goto out;
if (back->node.found_ref) {
if (rec->refs)
rec->refs--;
back->node.found_ref = 0;
}
if (back->node.found_extent_tree) {
if (rec->extent_item_refs)
rec->extent_item_refs--;
back->node.found_extent_tree = 0;
}
if (!back->node.found_extent_tree && back->node.found_ref) {
rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
}
maybe_free_extent_rec(extent_cache, rec);
out:
return 0;
}
static int delete_extent_records(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
u64 bytenr)
{
struct btrfs_root *extent_root = btrfs_extent_root(gfs_info, bytenr);
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
int ret;
int slot;
key.objectid = bytenr;
key.type = (u8)-1;
key.offset = (u64)-1;
while (1) {
ret = btrfs_search_slot(trans, extent_root, &key,
path, 0, 1);
if (ret < 0)
break;
if (ret > 0) {
ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
}
ret = 0;
leaf = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != bytenr)
break;
if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
found_key.type != BTRFS_METADATA_ITEM_KEY &&
found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
btrfs_release_path(path);
if (found_key.type == 0) {
if (found_key.offset == 0)
break;
key.offset = found_key.offset - 1;
key.type = found_key.type;
}
key.type = found_key.type - 1;
key.offset = (u64)-1;
continue;
}
fprintf(stderr,
"repair deleting extent record: key [%llu,%u,%llu]\n",
found_key.objectid, found_key.type, found_key.offset);
ret = btrfs_del_item(trans, extent_root, path);
if (ret)
break;
btrfs_release_path(path);
if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
found_key.type == BTRFS_METADATA_ITEM_KEY) {
u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
found_key.offset : gfs_info->nodesize;
ret = btrfs_update_block_group(trans, bytenr, bytes,
0, 0);
if (ret)
break;
}
}
btrfs_release_path(path);
return ret;
}
/*
* for a single backref, this will allocate a new extent
* and add the backref to it.
*/
static int record_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct extent_record *rec,
struct extent_backref *back,
int allocated, u64 flags)
{
int ret = 0;
struct btrfs_root *extent_root = btrfs_extent_root(gfs_info,
rec->start);
struct extent_buffer *leaf;
struct btrfs_key ins_key;
struct btrfs_extent_item *ei;
struct data_backref *dback;
struct btrfs_tree_block_info *bi;
if (!back->is_data)
rec->max_size = max_t(u64, rec->max_size, gfs_info->nodesize);
if (!allocated) {
u32 item_size = sizeof(*ei);
if (!back->is_data)
item_size += sizeof(*bi);
ins_key.objectid = rec->start;
ins_key.offset = rec->max_size;
ins_key.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_insert_empty_item(trans, extent_root, path,
&ins_key, item_size);
if (ret)
goto fail;
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
btrfs_set_extent_refs(leaf, ei, 0);
btrfs-progs: check: Populate extent generation correctly for data extents [BUG] When using `btrfs check --init-extent-tree`, we will create incorrect generation number for data extents in extent tree: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 0 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 [CAUSE] Since data extent generation is not as obvious as tree blocks, which has header containing its generations, so for data extents, its extent_record::generation is not really updated, resulting such 0 generation. [FIX] To get generation of a data extent, there are two sources we can rely: - EXTENT_ITEM There is always a btrfs_extent_item::generation can be utilized. Although this is not possible for --init-extent-tree use case. - EXTENT_DATA We have btrfs_file_extent_item::generation for regular and preallocated data extents. Since --init-extent-tree will go through subvolume trees, this would be the main source for extent data generation. Then we only need to make add_data_backref() to accept @gen parameter, and pass it down to extent_record structure. And for the final extent item generation update, here we add extra fallback values, if we can't find FILE_EXTENT items. In that case, we just fall back to current transid. With this modification, recreated data EXTENT_ITEM now has correct generation number: item 10 key (13631488 EXTENT_ITEM 1048576) itemoff 15828 itemsize 53 refs 1 gen 6 flags DATA extent data backref root FS_TREE objectid 257 offset 0 count 1 Reviewed-by: Su Yue <Damenly_Su@gmx.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2019-12-31 15:12:17 +08:00
if (rec->generation)
btrfs_set_extent_generation(leaf, ei, rec->generation);
else
btrfs_set_extent_generation(leaf, ei, trans->transid);
if (back->is_data) {
btrfs_set_extent_flags(leaf, ei,
BTRFS_EXTENT_FLAG_DATA);
} else {
struct btrfs_disk_key copy_key;
bi = (struct btrfs_tree_block_info *)(ei + 1);
memset_extent_buffer(leaf, 0, (unsigned long)bi,
sizeof(*bi));
btrfs_set_disk_key_objectid(&copy_key,
rec->info_objectid);
btrfs_set_disk_key_type(&copy_key, 0);
btrfs_set_disk_key_offset(&copy_key, 0);
btrfs_set_tree_block_level(leaf, bi, rec->info_level);
btrfs_set_tree_block_key(leaf, bi, &copy_key);
btrfs_set_extent_flags(leaf, ei,
flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
}
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_update_block_group(trans, rec->start,
rec->max_size, 1, 0);
if (ret)
goto fail;
btrfs_release_path(path);
}
if (back->is_data) {
u64 parent;
int i;
dback = to_data_backref(back);
if (back->full_backref)
parent = dback->parent;
else
parent = 0;
for (i = 0; i < dback->found_ref; i++) {
/* if parent != 0, we're doing a full backref
* passing BTRFS_FIRST_FREE_OBJECTID as the owner
* just makes the backref allocator create a data
* backref
*/
ret = btrfs_inc_extent_ref(trans, extent_root,
rec->start, rec->max_size,
parent,
dback->root,
parent ?
BTRFS_FIRST_FREE_OBJECTID :
dback->owner,
dback->offset);
if (ret)
break;
}
fprintf(stderr,
"adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
(unsigned long long)rec->start,
back->full_backref ? "parent" : "root",
back->full_backref ? (unsigned long long)parent :
(unsigned long long)dback->root,
(unsigned long long)dback->owner,
(unsigned long long)dback->offset, dback->found_ref);
} else {
u64 parent;
struct tree_backref *tback;
tback = to_tree_backref(back);
if (back->full_backref)
parent = tback->parent;
else
parent = 0;
ret = btrfs_inc_extent_ref(trans, extent_root,
rec->start, rec->max_size,
parent, tback->root, 0, 0);
fprintf(stderr,
"adding new tree backref on start %llu len %llu parent %llu root %llu\n",
rec->start, rec->max_size, parent, tback->root);
}
fail:
btrfs_release_path(path);
return ret;
}
static struct extent_entry *find_entry(struct list_head *entries,
u64 bytenr, u64 bytes)
{
struct extent_entry *entry = NULL;
list_for_each_entry(entry, entries, list) {
if (entry->bytenr == bytenr && entry->bytes == bytes)
return entry;
}
return NULL;
}
static struct extent_entry *find_most_right_entry(struct list_head *entries)
{
struct extent_entry *entry, *best = NULL, *prev = NULL;
list_for_each_entry(entry, entries, list) {
/*
* If there are as many broken entries as entries then we know
* not to trust this particular entry.
*/
if (entry->broken == entry->count)
continue;
/*
* Special case, when there are only two entries and 'best' is
* the first one
*/
if (!prev) {
best = entry;
prev = entry;
continue;
}
/*
* If our current entry == best then we can't be sure our best
* is really the best, so we need to keep searching.
*/
if (best && best->count == entry->count) {
prev = entry;
best = NULL;
continue;
}
/* Prev == entry, not good enough, have to keep searching */
if (!prev->broken && prev->count == entry->count)
continue;
if (!best)
best = (prev->count > entry->count) ? prev : entry;
else if (best->count < entry->count)
best = entry;
prev = entry;
}
return best;
}
static int repair_ref(struct btrfs_path *path, struct data_backref *dback,
struct extent_entry *entry)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
struct btrfs_key key;
u64 bytenr, bytes;
int ret, err;
key.objectid = dback->root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
fprintf(stderr, "Couldn't find root for our ref\n");
return -EINVAL;
}
/*
* The backref points to the original offset of the extent if it was
* split, so we need to search down to the offset we have and then walk
* forward until we find the backref we're looking for.
*/
key.objectid = dback->owner;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = dback->offset;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
fprintf(stderr, "Error looking up ref %d\n", ret);
return ret;
}
while (1) {
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret) {
fprintf(stderr, "Couldn't find our ref, next\n");
return -EINVAL;
}
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != dback->owner ||
key.type != BTRFS_EXTENT_DATA_KEY) {
fprintf(stderr, "Couldn't find our ref, search\n");
return -EINVAL;
}
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
break;
path->slots[0]++;
}
btrfs_release_path(path);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
/*
* Ok we have the key of the file extent we want to fix, now we can cow
* down to the thing and fix it.
*/
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
key.objectid, key.type, key.offset, ret);
goto out;
}
if (ret > 0) {
fprintf(stderr,
"well that's odd, we just found this key [%llu,%u,%llu]\n",
key.objectid, key.type, key.offset);
ret = -EINVAL;
goto out;
}
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_compression(leaf, fi) &&
dback->disk_bytenr != entry->bytenr) {
fprintf(stderr,
"ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
dback->disk_bytenr);
ret = -EINVAL;
goto out;
}
if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
} else if (dback->disk_bytenr > entry->bytenr) {
u64 off_diff, offset;
off_diff = dback->disk_bytenr - entry->bytenr;
offset = btrfs_file_extent_offset(leaf, fi);
if (dback->disk_bytenr + offset +
btrfs_file_extent_num_bytes(leaf, fi) >
entry->bytenr + entry->bytes) {
fprintf(stderr,
"ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
dback->disk_bytenr);
ret = -EINVAL;
goto out;
}
offset += off_diff;
btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
btrfs_set_file_extent_offset(leaf, fi, offset);
} else if (dback->disk_bytenr < entry->bytenr) {
u64 offset;
offset = btrfs_file_extent_offset(leaf, fi);
if (dback->disk_bytenr + offset < entry->bytenr) {
fprintf(stderr,
"ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
dback->disk_bytenr);
ret = -EINVAL;
goto out;
}
offset += dback->disk_bytenr;
offset -= entry->bytenr;
btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
btrfs_set_file_extent_offset(leaf, fi, offset);
}
btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
/*
* Chances are if disk_num_bytes were wrong then so is ram_bytes, but
* only do this if we aren't using compression, otherwise it's a
* trickier case.
*/
if (!btrfs_file_extent_compression(leaf, fi))
btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
else
printf("ram bytes may be wrong?\n");
btrfs_mark_buffer_dirty(leaf);
out:
err = btrfs_commit_transaction(trans, root);
btrfs_release_path(path);
return ret ? ret : err;
}
static int verify_backrefs(struct btrfs_path *path, struct extent_record *rec)
{
struct extent_backref *back, *tmp;
struct data_backref *dback;
struct extent_entry *entry, *best = NULL;
LIST_HEAD(entries);
int nr_entries = 0;
int broken_entries = 0;
int ret = 0;
short mismatch = 0;
/*
* Metadata is easy and the backrefs should always agree on bytenr and
* size, if not we've got bigger issues.
*/
if (rec->metadata)
return 0;
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
dback = to_data_backref(back);
/*
* We only pay attention to backrefs that we found a real
* backref for.
*/
if (dback->found_ref == 0)
continue;
/*
* For now we only catch when the bytes don't match, not the
* bytenr. We can easily do this at the same time, but I want
* to have a fs image to test on before we just add repair
* functionality willy-nilly so we know we won't screw up the
* repair.
*/
entry = find_entry(&entries, dback->disk_bytenr,
dback->bytes);
if (!entry) {
entry = malloc(sizeof(struct extent_entry));
if (!entry) {
ret = -ENOMEM;
goto out;
}
memset(entry, 0, sizeof(*entry));
entry->bytenr = dback->disk_bytenr;
entry->bytes = dback->bytes;
list_add_tail(&entry->list, &entries);
nr_entries++;
}
2007-04-24 03:56:27 +08:00
/*
* If we only have on entry we may think the entries agree when
* in reality they don't so we have to do some extra checking.
*/
if (dback->disk_bytenr != rec->start ||
dback->bytes != rec->nr || back->broken)
mismatch = 1;
if (back->broken) {
entry->broken++;
broken_entries++;
}
entry->count++;
}
/* Yay all the backrefs agree, carry on good sir */
if (nr_entries <= 1 && !mismatch)
goto out;
fprintf(stderr,
"attempting to repair backref discrepancy for bytenr %llu\n",
rec->start);
/*
* First we want to see if the backrefs can agree amongst themselves who
* is right, so figure out which one of the entries has the highest
* count.
*/
best = find_most_right_entry(&entries);
/*
* Ok so we may have an even split between what the backrefs think, so
* this is where we use the extent ref to see what it thinks.
*/
if (!best) {
entry = find_entry(&entries, rec->start, rec->nr);
if (!entry && (!broken_entries || !rec->found_rec)) {
fprintf(stderr,
"backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
rec->start, rec->nr);
ret = -EINVAL;
goto out;
} else if (!entry) {
/*
* Ok our backrefs were broken, we'll assume this is the
* correct value and add an entry for this range.
*/
entry = malloc(sizeof(struct extent_entry));
if (!entry) {
ret = -ENOMEM;
goto out;
}
memset(entry, 0, sizeof(*entry));
entry->bytenr = rec->start;
entry->bytes = rec->nr;
list_add_tail(&entry->list, &entries);
nr_entries++;
}
entry->count++;
best = find_most_right_entry(&entries);
if (!best) {
fprintf(stderr,
"backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
rec->start, rec->nr);
ret = -EINVAL;
goto out;
2007-04-25 00:44:05 +08:00
}
}
/*
* I don't think this can happen currently as we'll abort() if we catch
* this case higher up, but in case somebody removes that we still can't
* deal with it properly here yet, so just bail out of that's the case.
*/
if (best->bytenr != rec->start) {
fprintf(stderr,
"extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case. bytenr is %llu, bytes is %llu\n",
rec->start, rec->nr);
ret = -EINVAL;
goto out;
}
/*
* Ok great we all agreed on an extent record, let's go find the real
* references and fix up the ones that don't match.
*/
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
dback = to_data_backref(back);
/*
* Still ignoring backrefs that don't have a real ref attached
* to them.
*/
if (dback->found_ref == 0)
continue;
if (dback->bytes == best->bytes &&
dback->disk_bytenr == best->bytenr)
continue;
ret = repair_ref(path, dback, best);
if (ret)
goto out;
}
/*
* Ok we messed with the actual refs, which means we need to drop our
* entire cache and go back and rescan. I know this is a huge pain and
* adds a lot of extra work, but it's the only way to be safe. Once all
* the backrefs agree we may not need to do anything to the extent
* record itself.
*/
ret = -EAGAIN;
out:
while (!list_empty(&entries)) {
entry = list_entry(entries.next, struct extent_entry, list);
list_del_init(&entry->list);
free(entry);
}
return ret;
}
static int process_duplicates(struct cache_tree *extent_cache,
struct extent_record *rec)
{
struct extent_record *good, *tmp;
struct cache_extent *cache;
int ret;
/*
* If we found a extent record for this extent then return, or if we
* have more than one duplicate we are likely going to need to delete
* something.
*/
if (rec->found_rec || rec->num_duplicates > 1)
return 0;
/* Shouldn't happen but just in case */
BUG_ON(!rec->num_duplicates);
/*
* So this happens if we end up with a backref that doesn't match the
* actual extent entry. So either the backref is bad or the extent
* entry is bad. Either way we want to have the extent_record actually
* reflect what we found in the extent_tree, so we need to take the
* duplicate out and use that as the extent_record since the only way we
* get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
*/
remove_cache_extent(extent_cache, &rec->cache);
good = to_extent_record(rec->dups.next);
list_del_init(&good->list);
INIT_LIST_HEAD(&good->backrefs);
INIT_LIST_HEAD(&good->dups);
good->cache.start = good->start;
good->cache.size = good->nr;
good->content_checked = 0;
good->owner_ref_checked = 0;
good->num_duplicates = 0;
good->refs = rec->refs;
list_splice_init(&rec->backrefs, &good->backrefs);
while (1) {
cache = lookup_cache_extent(extent_cache, good->start,
good->nr);
if (!cache)
break;
tmp = container_of(cache, struct extent_record, cache);
/*
* If we find another overlapping extent and it's found_rec is
* set then it's a duplicate and we need to try and delete
* something.
*/
if (tmp->found_rec || tmp->num_duplicates > 0) {
if (list_empty(&good->list))
list_add_tail(&good->list,
&duplicate_extents);
good->num_duplicates += tmp->num_duplicates + 1;
list_splice_init(&tmp->dups, &good->dups);
list_del_init(&tmp->list);
list_add_tail(&tmp->list, &good->dups);
remove_cache_extent(extent_cache, &tmp->cache);
continue;
}
/*
* Ok we have another non extent item backed extent rec, so lets
* just add it to this extent and carry on like we did above.
*/
good->refs += tmp->refs;
list_splice_init(&tmp->backrefs, &good->backrefs);
remove_cache_extent(extent_cache, &tmp->cache);
free(tmp);
}
ret = insert_cache_extent(extent_cache, &good->cache);
BUG_ON(ret);
free(rec);
return good->num_duplicates ? 0 : 1;
}
static int delete_duplicate_records(struct btrfs_root *root,
struct extent_record *rec)
{
struct btrfs_trans_handle *trans;
LIST_HEAD(delete_list);
struct btrfs_path path;
struct extent_record *tmp, *good, *n;
int nr_del = 0;
int ret = 0, err;
struct btrfs_key key;
btrfs_init_path(&path);
good = rec;
/* Find the record that covers all of the duplicates. */
list_for_each_entry(tmp, &rec->dups, list) {
if (good->start < tmp->start)
continue;
if (good->nr > tmp->nr)
continue;
if (tmp->start + tmp->nr < good->start + good->nr) {
fprintf(stderr,
"Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
tmp->start, tmp->nr, good->start, good->nr);
abort();
}
good = tmp;
}
if (good != rec)
list_add_tail(&rec->list, &delete_list);
list_for_each_entry_safe(tmp, n, &rec->dups, list) {
if (tmp == good)
continue;
list_move_tail(&tmp->list, &delete_list);
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
list_for_each_entry(tmp, &delete_list, list) {
if (tmp->found_rec == 0)
continue;
key.objectid = tmp->start;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = tmp->nr;
/* Shouldn't happen but just in case */
if (tmp->metadata) {
fprintf(stderr,
"well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
tmp->start, tmp->nr);
abort();
}
root = btrfs_extent_root(gfs_info, key.objectid);
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret) {
if (ret > 0)
ret = -EINVAL;
break;
}
ret = btrfs_del_item(trans, root, &path);
if (ret)
break;
btrfs_release_path(&path);
nr_del++;
}
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
out:
while (!list_empty(&delete_list)) {
tmp = to_extent_record(delete_list.next);
list_del_init(&tmp->list);
if (tmp == rec)
continue;
free(tmp);
}
while (!list_empty(&rec->dups)) {
tmp = to_extent_record(rec->dups.next);
list_del_init(&tmp->list);
free(tmp);
}
btrfs_release_path(&path);
if (!ret && !nr_del)
rec->num_duplicates = 0;
return ret ? ret : nr_del;
}
/*
* Based extent backref item, we find all file extent items in the fs tree. By
* the info we can rebuild the extent backref item
*/
static int __find_possible_backrefs(struct btrfs_root *root,
u64 owner, u64 offset, u64 bytenr, u64 *refs_ret,
u64 *bytes_ret)
{
int ret = 0;
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
u64 backref_offset, disk_bytenr;
int slot;
btrfs_init_path(&path);
key.objectid = owner;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret > 0)
ret = -ENOENT;
if (ret) {
btrfs_release_path(&path);
return ret;
}
btrfs_release_path(&path);
key.objectid = owner;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_release_path(&path);
return ret;
}
while (1) {
leaf = path.nodes[0];
slot = path.slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, &path);
if (ret) {
if (ret > 0)
ret = 0;
break;
}
leaf = path.nodes[0];
slot = path.slots[0];
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if ((found_key.objectid != owner) ||
(found_key.type != BTRFS_EXTENT_DATA_KEY))
break;
fi = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
backref_offset = found_key.offset -
btrfs_file_extent_offset(leaf, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
*bytes_ret = btrfs_file_extent_disk_num_bytes(leaf,
fi);
if ((disk_bytenr == bytenr) &&
(backref_offset == offset)) {
(*refs_ret)++;
}
path.slots[0]++;
}
btrfs_release_path(&path);
return ret;
}
static int find_possible_backrefs(struct btrfs_path *path,
struct cache_tree *extent_cache,
struct extent_record *rec)
{
struct btrfs_root *root;
struct extent_backref *back, *tmp;
struct data_backref *dback;
struct cache_extent *cache;
struct btrfs_key key;
u64 bytenr, bytes;
u64 refs;
int ret;
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
/* Don't care about full backrefs (poor unloved backrefs) */
if (back->full_backref || !back->is_data)
continue;
dback = to_data_backref(back);
/* We found this one, we don't need to do a lookup */
if (dback->found_ref)
continue;
key.objectid = dback->root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
/* No root, definitely a bad ref, skip */
if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
continue;
/* Other err, exit */
if (IS_ERR(root))
return PTR_ERR(root);
refs = 0;
bytes = 0;
ret = __find_possible_backrefs(root, dback->owner,
dback->offset, rec->start, &refs, &bytes);
if (ret)
continue;
bytenr = rec->start;
cache = lookup_cache_extent(extent_cache, bytenr, 1);
if (cache) {
struct extent_record *tmp;
tmp = container_of(cache, struct extent_record, cache);
/*
* If we found an extent record for the bytenr for this
* particular backref then we can't add it to our
* current extent record. We only want to add backrefs
* that don't have a corresponding extent item in the
* extent tree since they likely belong to this record
* and we need to fix it if it doesn't match bytenrs.
*/
if (tmp->found_rec)
continue;
}
dback->found_ref += refs;
dback->disk_bytenr = bytenr;
dback->bytes = bytes;
/*
* Set this so the verify backref code knows not to trust the
* values in this backref.
*/
back->broken = 1;
}
return 0;
}
/*
* when an incorrect extent item is found, this will delete
* all of the existing entries for it and recreate them
* based on what the tree scan found.
*/
static int fixup_extent_refs(struct cache_tree *extent_cache,
struct extent_record *rec)
{
struct btrfs_trans_handle *trans = NULL;
int ret;
struct btrfs_path path;
struct cache_extent *cache;
struct extent_backref *back, *tmp;
int allocated = 0;
u64 flags = 0;
if (rec->flag_block_full_backref)
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
btrfs_init_path(&path);
if (rec->refs != rec->extent_item_refs && !rec->metadata) {
/*
* Sometimes the backrefs themselves are so broken they don't
* get attached to any meaningful rec, so first go back and
* check any of our backrefs that we couldn't find and throw
* them into the list if we find the backref so that
* verify_backrefs can figure out what to do.
*/
ret = find_possible_backrefs(&path, extent_cache, rec);
if (ret < 0)
goto out;
}
/* step one, make sure all of the backrefs agree */
ret = verify_backrefs(&path, rec);
if (ret < 0)
goto out;
trans = btrfs_start_transaction(gfs_info->tree_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
/* step two, delete all the existing records */
ret = delete_extent_records(trans, &path, rec->start);
if (ret < 0)
goto out;
/* was this block corrupt? If so, don't add references to it */
cache = lookup_cache_extent(gfs_info->corrupt_blocks,
rec->start, rec->max_size);
if (cache) {
ret = 0;
goto out;
}
/* step three, recreate all the refs we did find */
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
/*
* if we didn't find any references, don't create a
* new extent record
*/
if (!back->found_ref)
continue;
rec->bad_full_backref = 0;
ret = record_extent(trans, &path, rec, back, allocated, flags);
allocated = 1;
if (ret)
goto out;
}
out:
if (!ret && !IS_ERR(trans)) {
int err = btrfs_commit_transaction(trans, gfs_info->tree_root);
if (!ret)
ret = err;
}
if (!ret)
fprintf(stderr, "Repaired extent references for %llu\n",
(unsigned long long)rec->start);
btrfs_release_path(&path);
return ret;
}
static int fixup_extent_flags(struct extent_record *rec)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_extent_root(gfs_info, rec->start);
struct btrfs_path path;
struct btrfs_extent_item *ei;
struct btrfs_key key;
u64 flags;
int ret = 0;
bool metadata_item = rec->metadata;
if (!btrfs_fs_incompat(gfs_info, SKINNY_METADATA))
metadata_item = false;
retry:
key.objectid = rec->start;
if (metadata_item) {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = rec->info_level;
} else {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = rec->max_size;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_init_path(&path);
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
if (ret < 0) {
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
return ret;
} else if (ret) {
if (key.type == BTRFS_METADATA_ITEM_KEY) {
metadata_item = false;
goto retry;
}
fprintf(stderr, "Didn't find extent for %llu\n",
(unsigned long long)rec->start);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
return -ENOENT;
}
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(path.nodes[0], ei);
if (rec->flag_block_full_backref) {
fprintf(stderr, "setting full backref on %llu\n",
(unsigned long long)key.objectid);
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else {
fprintf(stderr, "clearing full backref on %llu\n",
(unsigned long long)key.objectid);
flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
}
btrfs_set_extent_flags(path.nodes[0], ei, flags);
btrfs_mark_buffer_dirty(path.nodes[0]);
btrfs_release_path(&path);
ret = btrfs_commit_transaction(trans, root);
if (!ret)
fprintf(stderr, "Repaired extent flags for %llu\n",
(unsigned long long)rec->start);
return ret;
}
/* right now we only prune from the extent allocation tree */
static int prune_one_block(struct btrfs_trans_handle *trans,
struct btrfs_corrupt_block *corrupt)
{
struct btrfs_root *extent_root;
int ret;
struct btrfs_path path;
struct extent_buffer *eb;
u64 found;
int slot;
int nritems;
int level = corrupt->level + 1;
btrfs_init_path(&path);
again:
extent_root = btrfs_extent_root(gfs_info, corrupt->key.objectid);
/* we want to stop at the parent to our busted block */
path.lowest_level = level;
ret = btrfs_search_slot(trans, extent_root, &corrupt->key, &path,
-1, 1);
if (ret < 0)
goto out;
eb = path.nodes[level];
if (!eb) {
ret = -ENOENT;
goto out;
}
/*
* hopefully the search gave us the block we want to prune,
* lets try that first
*/
slot = path.slots[level];
found = btrfs_node_blockptr(eb, slot);
if (found == corrupt->cache.start)
goto del_ptr;
nritems = btrfs_header_nritems(eb);
/* the search failed, lets scan this node and hope we find it */
for (slot = 0; slot < nritems; slot++) {
found = btrfs_node_blockptr(eb, slot);
if (found == corrupt->cache.start)
goto del_ptr;
}
/*
* We couldn't find the bad block.
* TODO: search all the nodes for pointers to this block
*/
if (eb == extent_root->node) {
ret = -ENOENT;
goto out;
} else {
level++;
btrfs_release_path(&path);
goto again;
}
del_ptr:
printk("deleting pointer to block %llu\n", corrupt->cache.start);
ret = btrfs_del_ptr(extent_root, &path, level, slot);
out:
btrfs_release_path(&path);
return ret;
}
static int prune_corrupt_blocks(void)
{
struct btrfs_trans_handle *trans = NULL;
struct cache_extent *cache;
struct btrfs_corrupt_block *corrupt;
while (1) {
cache = search_cache_extent(gfs_info->corrupt_blocks, 0);
if (!cache)
break;
if (!trans) {
trans = btrfs_start_transaction(gfs_info->tree_root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
}
corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
prune_one_block(trans, corrupt);
remove_cache_extent(gfs_info->corrupt_blocks, cache);
}
if (trans)
return btrfs_commit_transaction(trans, gfs_info->tree_root);
return 0;
}
static int record_unaligned_extent_rec(struct extent_record *rec)
{
struct extent_backref *back, *tmp;
struct data_backref *dback;
struct btrfs_root *dest_root;
struct btrfs_key key;
struct unaligned_extent_rec_t *urec;
LIST_HEAD(entries);
int ret = 0;
fprintf(stderr, "record unaligned extent record on %llu %llu\n",
rec->start, rec->nr);
/*
* Metadata is easy and the backrefs should always agree on bytenr and
* size, if not we've got bigger issues.
*/
if (rec->metadata)
return 0;
rbtree_postorder_for_each_entry_safe(back, tmp,
&rec->backref_tree, node) {
bool skip = false;
if (back->full_backref || !back->is_data)
continue;
dback = to_data_backref(back);
key.objectid = dback->root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
dest_root = btrfs_read_fs_root(gfs_info, &key);
/* For non-exist root we just skip it */
if (IS_ERR_OR_NULL(dest_root))
continue;
/*
* If we repaired something and restarted we could potentially
* try to add this unaligned record multiple times, so check
* before we add a new one.
*/
list_for_each_entry(urec, &dest_root->unaligned_extent_recs, list) {
if (urec->objectid == dest_root->objectid &&
urec->owner == dback->owner &&
urec->bytenr == rec->start) {
skip = true;
break;
}
}
if (skip)
continue;
/*
* If we repaired something and restarted we could potentially
* try to add this unaligned record multiple times, so check
* before we add a new one.
*/
list_for_each_entry(urec, &dest_root->unaligned_extent_recs, list) {
if (urec->objectid == dest_root->objectid &&
urec->owner == dback->owner &&
urec->bytenr == rec->start) {
skip = true;
break;
}
}
if (skip)
continue;
urec = malloc(sizeof(struct unaligned_extent_rec_t));
if (!urec)
return -ENOMEM;
INIT_LIST_HEAD(&urec->list);
urec->objectid = dest_root->objectid;
urec->owner = dback->owner;
urec->offset = 0;
urec->bytenr = rec->start;
ret = find_file_extent_offset_by_bytenr(dest_root,
dback->owner, rec->start, &urec->offset);
if (ret) {
free(urec);
return ret;
}
list_add(&urec->list, &dest_root->unaligned_extent_recs);
}
return ret;
}
static int repair_extent_item_generation(struct extent_record *rec)
{
struct btrfs_trans_handle *trans;
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_root *extent_root = btrfs_extent_root(gfs_info,
rec->start);
u64 new_gen = 0;;
int ret;
key.objectid = rec->start;
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
get_extent_item_generation(rec->start, &new_gen);
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
return ret;
}
btrfs_init_path(&path);
ret = btrfs_search_slot(trans, extent_root, &key, &path, 0, 1);
/* Not possible */
if (ret == 0)
ret = -EUCLEAN;
if (ret < 0)
goto out;
ret = btrfs_previous_extent_item(extent_root, &path, rec->start);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
if (!new_gen)
new_gen = trans->transid;
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_extent_item);
btrfs_set_extent_generation(path.nodes[0], ei, new_gen);
ret = btrfs_commit_transaction(trans, extent_root);
if (ret < 0) {
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
goto out;
}
printf("Reset extent item (%llu) generation to %llu\n",
key.objectid, new_gen);
rec->generation = new_gen;
out:
btrfs_release_path(&path);
if (ret < 0)
btrfs_abort_transaction(trans, ret);
return ret;
}
static int check_extent_refs(struct btrfs_root *root,
struct cache_tree *extent_cache)
{
struct extent_record *rec;
struct cache_extent *cache;
u64 super_gen;
int ret = 0;
int had_dups = 0;
int err = 0;
if (opt_check_repair) {
/*
* if we're doing a repair, we have to make sure
* we don't allocate from the problem extents.
* In the worst case, this will be all the
* extents in the FS
*/
cache = search_cache_extent(extent_cache, 0);
while (cache) {
rec = container_of(cache, struct extent_record, cache);
set_extent_dirty(gfs_info->excluded_extents,
rec->start,
rec->start + rec->max_size - 1);
cache = next_cache_extent(cache);
}
/* pin down all the corrupted blocks too */
cache = search_cache_extent(gfs_info->corrupt_blocks, 0);
while (cache) {
set_extent_dirty(gfs_info->excluded_extents,
cache->start,
cache->start + cache->size - 1);
cache = next_cache_extent(cache);
}
prune_corrupt_blocks();
reset_cached_block_groups();
}
reset_cached_block_groups();
/*
* We need to delete any duplicate entries we find first otherwise we
* could mess up the extent tree when we have backrefs that actually
* belong to a different extent item and not the weird duplicate one.
*/
while (opt_check_repair && !list_empty(&duplicate_extents)) {
rec = to_extent_record(duplicate_extents.next);
list_del_init(&rec->list);
/* Sometimes we can find a backref before we find an actual
* extent, so we need to process it a little bit to see if there
* truly are multiple EXTENT_ITEM_KEY's for the same range, or
* if this is a backref screwup. If we need to delete stuff
* process_duplicates() will return 0, otherwise it will return
* 1 and we
*/
if (process_duplicates(extent_cache, rec))
continue;
ret = delete_duplicate_records(root, rec);
if (ret < 0)
return ret;
/*
* delete_duplicate_records will return the number of entries
* deleted, so if it's greater than 0 then we know we actually
* did something and we need to remove.
*/
if (ret)
had_dups = 1;
}
if (had_dups)
return -EAGAIN;
super_gen = btrfs_super_generation(gfs_info->super_copy);
while (1) {
int cur_err = 0;
int fix = 0;
cache = search_cache_extent(extent_cache, 0);
if (!cache)
break;
rec = container_of(cache, struct extent_record, cache);
if (rec->num_duplicates) {
fprintf(stderr,
"extent item %llu has multiple extent items\n",
(unsigned long long)rec->start);
cur_err = 1;
}
if (rec->generation > super_gen + 1) {
bool repaired = false;
if (opt_check_repair) {
ret = repair_extent_item_generation(rec);
if (ret == 0)
repaired = true;
}
if (!repaired) {
error(
"invalid generation for extent %llu, have %llu expect (0, %llu]",
rec->start, rec->generation,
super_gen + 1);
cur_err = 1;
}
}
if (rec->refs != rec->extent_item_refs) {
fprintf(stderr, "ref mismatch on [%llu %llu] ",
(unsigned long long)rec->start,
(unsigned long long)rec->nr);
fprintf(stderr, "extent item %llu, found %llu\n",
(unsigned long long)rec->extent_item_refs,
(unsigned long long)rec->refs);
fix = 1;
cur_err = 1;
}
if (!IS_ALIGNED(rec->start, gfs_info->sectorsize)) {
fprintf(stderr, "unaligned extent rec on [%llu %llu]\n",
(unsigned long long)rec->start,
(unsigned long long)rec->nr);
ret = record_unaligned_extent_rec(rec);
if (ret)
goto repair_abort;
/* No need to check backref */
goto next;
}
if (all_backpointers_checked(rec, 1)) {
fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
(unsigned long long)rec->start,
(unsigned long long)rec->nr);
fix = 1;
cur_err = 1;
}
if (!rec->owner_ref_checked) {
fprintf(stderr, "owner ref check failed [%llu %llu]\n",
(unsigned long long)rec->start,
(unsigned long long)rec->nr);
fix = 1;
cur_err = 1;
}
if (opt_check_repair && fix) {
ret = fixup_extent_refs(extent_cache, rec);
if (ret)
goto repair_abort;
}
if (rec->bad_full_backref) {
fprintf(stderr, "bad full backref, on [%llu]\n",
(unsigned long long)rec->start);
if (opt_check_repair) {
ret = fixup_extent_flags(rec);
if (ret)
goto repair_abort;
fix = 1;
}
cur_err = 1;
}
/*
* Although it's not a extent ref's problem, we reuse this
* routine for error reporting.
* No repair function yet.
*/
if (rec->crossing_stripes) {
fprintf(stderr,
"bad metadata [%llu, %llu) crossing stripe boundary\n",
rec->start, rec->start + rec->max_size);
cur_err = 1;
}
if (rec->wrong_chunk_type) {
fprintf(stderr,
"bad extent [%llu, %llu), type mismatch with chunk\n",
rec->start, rec->start + rec->max_size);
cur_err = 1;
}
next:
err = cur_err;
remove_cache_extent(extent_cache, cache);
free_all_extent_backrefs(rec);
if (!init_extent_tree && opt_check_repair && (!cur_err || fix))
clear_extent_dirty(gfs_info->excluded_extents,
rec->start,
rec->start + rec->max_size - 1);
free(rec);
}
repair_abort:
if (opt_check_repair) {
if (ret && ret != -EAGAIN) {
fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
exit(1);
} else if (!ret) {
struct btrfs_trans_handle *trans;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto repair_abort;
}
ret = btrfs_fix_block_accounting(trans);
if (ret)
goto repair_abort;
ret = btrfs_commit_transaction(trans, root);
if (ret)
goto repair_abort;
}
return ret;
}
if (err)
err = -EIO;
return err;
}
/*
* Check the chunk with its block group/dev list ref:
* Return 0 if all refs seems valid.
* Return 1 if part of refs seems valid, need later check for rebuild ref
* like missing block group and needs to search extent tree to rebuild them.
* Return -1 if essential refs are missing and unable to rebuild.
*/
static int check_chunk_refs(struct chunk_record *chunk_rec,
struct block_group_tree *block_group_cache,
struct device_extent_tree *dev_extent_cache,
int silent)
{
struct cache_extent *block_group_item;
struct block_group_record *block_group_rec;
struct cache_extent *dev_extent_item;
struct device_extent_record *dev_extent_rec;
u64 devid;
u64 offset;
u64 length;
int metadump_v2 = 0;
int i;
int ret = 0;
block_group_item = lookup_cache_extent(&block_group_cache->tree,
chunk_rec->offset,
chunk_rec->length);
if (block_group_item) {
block_group_rec = container_of(block_group_item,
struct block_group_record,
cache);
if (chunk_rec->length != block_group_rec->offset ||
chunk_rec->offset != block_group_rec->objectid ||
(!metadump_v2 &&
chunk_rec->type_flags != block_group_rec->flags)) {
if (!silent)
fprintf(stderr,
"Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
chunk_rec->objectid,
chunk_rec->type,
chunk_rec->offset,
chunk_rec->length,
chunk_rec->offset,
chunk_rec->type_flags,
block_group_rec->objectid,
block_group_rec->type,
block_group_rec->offset,
block_group_rec->offset,
block_group_rec->objectid,
block_group_rec->flags);
ret = -1;
} else {
list_del_init(&block_group_rec->list);
chunk_rec->bg_rec = block_group_rec;
}
} else {
if (!silent)
fprintf(stderr,
"Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
chunk_rec->objectid,
chunk_rec->type,
chunk_rec->offset,
chunk_rec->length,
chunk_rec->offset,
chunk_rec->type_flags);
ret = 1;
}
if (metadump_v2)
return ret;
length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
chunk_rec->num_stripes);
for (i = 0; i < chunk_rec->num_stripes; ++i) {
devid = chunk_rec->stripes[i].devid;
offset = chunk_rec->stripes[i].offset;
dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
devid, offset, length);
if (dev_extent_item) {
dev_extent_rec = container_of(dev_extent_item,
struct device_extent_record,
cache);
if (dev_extent_rec->objectid != devid ||
dev_extent_rec->offset != offset ||
dev_extent_rec->chunk_offset != chunk_rec->offset ||
dev_extent_rec->length != length) {
if (!silent)
fprintf(stderr,
"Chunk[%llu, %u, %llu] stripe[%llu, %llu] mismatch dev extent[%llu, %llu, %llu]\n",
chunk_rec->objectid,
chunk_rec->type,
chunk_rec->offset,
chunk_rec->stripes[i].devid,
chunk_rec->stripes[i].offset,
dev_extent_rec->objectid,
dev_extent_rec->offset,
dev_extent_rec->length);
ret = -1;
} else {
list_move(&dev_extent_rec->chunk_list,
&chunk_rec->dextents);
}
} else {
if (!silent)
fprintf(stderr,
"Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
chunk_rec->objectid,
chunk_rec->type,
chunk_rec->offset,
chunk_rec->stripes[i].devid,
chunk_rec->stripes[i].offset);
ret = -1;
}
}
return ret;
}
/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
int check_chunks(struct cache_tree *chunk_cache,
struct block_group_tree *block_group_cache,
struct device_extent_tree *dev_extent_cache,
struct list_head *good, struct list_head *bad,
struct list_head *rebuild, int silent)
{
struct cache_extent *chunk_item;
struct chunk_record *chunk_rec;
struct block_group_record *bg_rec;
struct device_extent_record *dext_rec;
int err;
int ret = 0;
chunk_item = first_cache_extent(chunk_cache);
while (chunk_item) {
chunk_rec = container_of(chunk_item, struct chunk_record,
cache);
err = check_chunk_refs(chunk_rec, block_group_cache,
dev_extent_cache, silent);
if (err < 0)
ret = err;
if (err == 0 && good)
list_add_tail(&chunk_rec->list, good);
if (err > 0 && rebuild)
list_add_tail(&chunk_rec->list, rebuild);
if (err < 0 && bad)
list_add_tail(&chunk_rec->list, bad);
chunk_item = next_cache_extent(chunk_item);
}
list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
if (!silent)
fprintf(stderr,
"Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
bg_rec->objectid,
bg_rec->offset,
bg_rec->flags);
if (!ret)
ret = 1;
}
list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
chunk_list) {
if (!silent)
fprintf(stderr,
"Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
dext_rec->objectid,
dext_rec->offset,
dext_rec->length);
if (!ret)
ret = 1;
}
return ret;
}
btrfs-progs: check: lowmem: Fix several bugs related to afterward search Since btrfs_search_slot() can point to the slot which is beyond the leaves' capacity, we should pay extra attention when doing afterward search. While for lowmem check, several places uses afterward search: 1) Block group item used space check 2) Device item used space check 3) Data extent backref check. In the following case for block group item check, btrfs lowmem mode check will skip the block group and report false alert: leaf 29405184 items 37 free space 1273 generation 11 owner 2 ... item 36 key (77594624 EXTENT_ITEM 2097152) extent refs 1 gen 8 flags DATA extent data backref root 5 objectid 265 offset 0 count 1 leaf 29409280 items 43 free space 670 generation 11 owner 2 item 0 key (96468992 EXTENT_ITEM 2097152) extent refs 1 gen 8 flags DATA extent data backref root 5 objectid 274 offset 0 count 1 item 1 key (96468992 BLOCK_GROUP_ITEM 33554432) block group used 2265088 chunk_objectid 256 flags DATA When checking block group item, we will search key (96468992 0 0) to start from the first item in the block group. While search_slot() will point to leaf 29405184, slot 37 which is beyond leaf capacity. And when reading key from slot 37, uninitialized data can be read out and cause us to exit block group item check, leading to false alert. Fix it by checking path.slot[0] before reading out the key. Reported-by: Christoph Anton Mitterer <calestyo@scientia.net> Reported-by: Chris Murphy <chris@colorremedies.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-21 16:34:27 +08:00
static int check_device_used(struct device_record *dev_rec,
struct device_extent_tree *dext_cache)
{
struct cache_extent *cache;
struct device_extent_record *dev_extent_rec;
u64 total_byte = 0;
btrfs-progs: check: lowmem: Fix several bugs related to afterward search Since btrfs_search_slot() can point to the slot which is beyond the leaves' capacity, we should pay extra attention when doing afterward search. While for lowmem check, several places uses afterward search: 1) Block group item used space check 2) Device item used space check 3) Data extent backref check. In the following case for block group item check, btrfs lowmem mode check will skip the block group and report false alert: leaf 29405184 items 37 free space 1273 generation 11 owner 2 ... item 36 key (77594624 EXTENT_ITEM 2097152) extent refs 1 gen 8 flags DATA extent data backref root 5 objectid 265 offset 0 count 1 leaf 29409280 items 43 free space 670 generation 11 owner 2 item 0 key (96468992 EXTENT_ITEM 2097152) extent refs 1 gen 8 flags DATA extent data backref root 5 objectid 274 offset 0 count 1 item 1 key (96468992 BLOCK_GROUP_ITEM 33554432) block group used 2265088 chunk_objectid 256 flags DATA When checking block group item, we will search key (96468992 0 0) to start from the first item in the block group. While search_slot() will point to leaf 29405184, slot 37 which is beyond leaf capacity. And when reading key from slot 37, uninitialized data can be read out and cause us to exit block group item check, leading to false alert. Fix it by checking path.slot[0] before reading out the key. Reported-by: Christoph Anton Mitterer <calestyo@scientia.net> Reported-by: Chris Murphy <chris@colorremedies.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com>
2017-02-21 16:34:27 +08:00
if (dev_rec->byte_used > dev_rec->total_byte) {
error(
"device %llu has incorrect used bytes %llu > total bytes %llu",
dev_rec->devid, dev_rec->byte_used, dev_rec->total_byte);
return -EUCLEAN;
}
cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
while (cache) {
dev_extent_rec = container_of(cache,
struct device_extent_record,
cache);
if (dev_extent_rec->objectid != dev_rec->devid)
break;
list_del_init(&dev_extent_rec->device_list);
total_byte += dev_extent_rec->length;
cache = next_cache_extent(cache);
}
if (total_byte != dev_rec->byte_used) {
int ret = -1;
fprintf(stderr,
"Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
total_byte, dev_rec->byte_used, dev_rec->objectid,
dev_rec->type, dev_rec->offset);
if (opt_check_repair) {
ret = repair_dev_item_bytes_used(gfs_info,
dev_rec->devid, total_byte);
}
return ret;
} else {
return 0;
}
}
/*
* Unlike device size alignment check above, some super total_bytes check
* failure can lead to mount failure for newer kernel.
*
* So this function will return the error for a fatal super total_bytes problem.
*/
static bool is_super_size_valid(void)
{
struct btrfs_fs_devices *fs_devices = gfs_info->fs_devices;
const u64 super_bytes = btrfs_super_total_bytes(gfs_info->super_copy);
u64 total_bytes = 0;
while (fs_devices) {
struct btrfs_device *dev;
list_for_each_entry(dev, &fs_devices->devices, dev_list)
total_bytes += dev->total_bytes;
fs_devices = fs_devices->seed;
}
/* Important check, which can cause unmountable fs */
if (super_bytes < total_bytes) {
error("super total bytes %llu smaller than real device(s) size %llu",
super_bytes, total_bytes);
error("mounting this fs may fail for newer kernels");
error("this can be fixed by 'btrfs rescue fix-device-size'");
return false;
}
/*
* Optional check, just to make everything aligned and match with each
* other.
*
* For a btrfs-image restored fs, we don't need to check it anyway.
*/
if (btrfs_super_flags(gfs_info->super_copy) &
(BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
return true;
if (!IS_ALIGNED(super_bytes, gfs_info->sectorsize) ||
!IS_ALIGNED(total_bytes, gfs_info->sectorsize) ||
super_bytes != total_bytes) {
warning("minor unaligned/mismatch device size detected:"
"\tsuper block total bytes=%llu found total bytes=%llu",
super_bytes, total_bytes);
warning(
"recommended to use 'btrfs rescue fix-device-size' to fix it");
}
return true;
}
/* check btrfs_dev_item -> btrfs_dev_extent */
static int check_devices(struct rb_root *dev_cache,
struct device_extent_tree *dev_extent_cache)
{
struct rb_node *dev_node;
struct device_record *dev_rec;
struct device_extent_record *dext_rec;
int err;
int ret = 0;
dev_node = rb_first(dev_cache);
while (dev_node) {
dev_rec = container_of(dev_node, struct device_record, node);
err = check_device_used(dev_rec, dev_extent_cache);
if (err)
ret = err;
check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
gfs_info->sectorsize);
btrfs-progs: check: verify the underlying block device size is valid [BUG] There is a bug report that, one btrfs got its underlying device shrunk accidentally. Fortunately the user has no data at the truncated range. However kernel will reject such filesystem, while btrfs-check reports nothing wrong with it. This can be easily reproduced by: # truncate -s 1G test.img # mkfs.btrfs test.img # truncate -s 996M test.img # btrfs check test.img Opening filesystem to check... Checking filesystem on test.img UUID: dbf0a16d-f158-4383-9025-29d7f4c43f17 [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots [5/7] checking only csums items (without verifying data) [6/7] checking root refs [7/7] checking quota groups skipped (not enabled on this FS) found 16527360 bytes used, no error found ^^^^^^^^^^^^^^ total csum bytes: 13836 total tree bytes: 2359296 total fs tree bytes: 2162688 total extent tree bytes: 65536 btree space waste bytes: 503569 file data blocks allocated: 14168064 referenced 14168064 [CAUSE] Btrfs check really only checks the metadata cross references, not really bothering if the underlying device has correct size. Thus we completely ignored such size mismatch. [FIX] For both regular and lowmem mode, add extra check against the underlying block device size. If the block device size is smaller than its total_bytes, gives a error message and error out. Now the check looks like this for both modes: ... [2/7] checking extents ERROR: block device size is smaller than total_bytes in device item, has 1046478848 expect >= 1073741824 ERROR: errors found in extent allocation tree or chunk allocation [3/7] checking free space tree ... found 16527360 bytes used, error(s) found Issue: #504 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2022-08-30 14:49:42 +08:00
if (dev_rec->bad_block_dev_size && !ret)
ret = 1;
dev_node = rb_next(dev_node);
}
list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
device_list) {
fprintf(stderr,
"Device extent[%llu, %llu, %llu] didn't find its device.\n",
dext_rec->objectid, dext_rec->offset, dext_rec->length);
if (!ret)
ret = 1;
}
return ret;
}
static int add_root_item_to_list(struct list_head *head,
u64 objectid, u64 bytenr, u64 last_snapshot,
u8 level, u8 drop_level,
struct btrfs_key *drop_key)
{
struct root_item_record *ri_rec;
ri_rec = malloc(sizeof(*ri_rec));
if (!ri_rec)
return -ENOMEM;
ri_rec->bytenr = bytenr;
ri_rec->objectid = objectid;
ri_rec->level = level;
ri_rec->drop_level = drop_level;
ri_rec->last_snapshot = last_snapshot;
if (drop_key)
memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
list_add_tail(&ri_rec->list, head);
return 0;
}
static void free_root_item_list(struct list_head *list)
{
struct root_item_record *ri_rec;
while (!list_empty(list)) {
ri_rec = list_first_entry(list, struct root_item_record,
list);
list_del_init(&ri_rec->list);
free(ri_rec);
}
}
static int deal_root_from_list(struct list_head *list,
struct btrfs_root *root,
struct block_info *bits,
int bits_nr,
struct cache_tree *pending,
struct cache_tree *seen,
struct cache_tree *reada,
struct cache_tree *nodes,
struct cache_tree *extent_cache,
struct cache_tree *chunk_cache,
struct rb_root *dev_cache,
struct block_group_tree *block_group_cache,
struct device_extent_tree *dev_extent_cache)
{
int ret = 0;
u64 last = 0;
while (!list_empty(list)) {
struct root_item_record *rec;
struct extent_buffer *buf;
rec = list_entry(list->next,
struct root_item_record, list);
last = 0;
buf = read_tree_block(gfs_info, rec->bytenr, 0);
if (!extent_buffer_uptodate(buf)) {
free_extent_buffer(buf);
ret = -EIO;
break;
}
ret = add_root_to_pending(buf, extent_cache, pending,
seen, nodes, rec->objectid);
if (ret < 0)
break;
/*
* To rebuild extent tree, we need deal with snapshot
* one by one, otherwise we deal with node firstly which
* can maximize readahead.
*/
while (1) {
g_task_ctx.item_count++;
ret = run_next_block(root, bits, bits_nr, &last,
pending, seen, reada, nodes,
extent_cache, chunk_cache,
dev_cache, block_group_cache,
dev_extent_cache, rec);
if (ret != 0)
break;
}
free_extent_buffer(buf);
list_del(&rec->list);
free(rec);
if (ret < 0)
break;
}
while (ret >= 0) {
ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
reada, nodes, extent_cache, chunk_cache,
dev_cache, block_group_cache,
dev_extent_cache, NULL);
if (ret != 0) {
if (ret > 0)
ret = 0;
break;
}
}
return ret;
}
static int check_block_groups(struct block_group_tree *bg_cache)
{
struct btrfs_trans_handle *trans;
struct cache_extent *item;
struct block_group_record *bg_rec;
u64 used = 0;
int ret = 0;
for (item = first_cache_extent(&bg_cache->tree);
item;
item = next_cache_extent(item)) {
bg_rec = container_of(item, struct block_group_record, cache);
used += bg_rec->actual_used;
if (bg_rec->disk_used == bg_rec->actual_used)
continue;
fprintf(stderr,
"block group [%llu %llu] used %llu but extent items used %llu\n",
bg_rec->objectid, bg_rec->offset, bg_rec->disk_used,
bg_rec->actual_used);
ret = -1;
}
/*
* We check the super bytes_used here because it's the sum of all block
* groups used, and the repair actually happens in
* btrfs_fix_block_accounting, so we can kill both birds with the same
* stone here.
*/
if (used != btrfs_super_bytes_used(gfs_info->super_copy)) {
fprintf(stderr,
"super bytes used %llu mismatches actual used %llu\n",
btrfs_super_bytes_used(gfs_info->super_copy), used);
ret = -1;
}
if (!opt_check_repair || !ret)
return ret;
trans = btrfs_start_transaction(gfs_info->tree_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
fprintf(stderr, "Failed to start a transaction\n");
return ret;
}
ret = btrfs_fix_block_accounting(trans);
btrfs_commit_transaction(trans, gfs_info->tree_root);
return ret ? ret : -EAGAIN;
}
/**
* parse_tree_roots - Go over all roots in the tree root and add each one to
* a list.
*
* @normal_trees - list to contains all roots which don't have a drop
* operation in progress
*
* @dropping_trees - list containing all roots which have a drop operation
* pending
*
* Returns 0 on success or a negative value indicating an error.
*/
static int parse_tree_roots(struct list_head *normal_trees,
struct list_head *dropping_trees)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_root_item ri;
struct extent_buffer *leaf;
int slot;
int ret = 0;
btrfs_init_path(&path);
key.offset = 0;
key.objectid = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, gfs_info->tree_root, &key, &path, 0, 0);
if (ret < 0)
goto out;
while (1) {
leaf = path.nodes[0];
slot = path.slots[0];
if (slot >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(gfs_info->tree_root, &path);
if (ret != 0)
break;
leaf = path.nodes[0];
slot = path.slots[0];
}
btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
unsigned long offset;
u64 last_snapshot;
u8 level;
offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
read_extent_buffer(leaf, &ri, offset, sizeof(ri));
last_snapshot = btrfs_root_last_snapshot(&ri);
level = btrfs_root_level(&ri);
if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
ret = add_root_item_to_list(normal_trees,
found_key.objectid,
btrfs_root_bytenr(&ri),
last_snapshot, level,
0, NULL);
if (ret < 0)
break;
} else {
u64 objectid = found_key.objectid;
btrfs_disk_key_to_cpu(&found_key,
&ri.drop_progress);
ret = add_root_item_to_list(dropping_trees,
objectid,
btrfs_root_bytenr(&ri),
last_snapshot, level,
ri.drop_level, &found_key);
if (ret < 0)
break;
}
}
path.slots[0]++;
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* Check if all dev extents are valid (not overlapping nor beyond device
* boundary).
*
* Dev extents <-> chunk cross checking is already done in check_chunks().
*/
static int check_dev_extents(void)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_root *dev_root = gfs_info->dev_root;
int ret;
u64 prev_devid = 0;
u64 prev_dev_ext_end = 0;
btrfs_init_path(&path);
key.objectid = 1;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
if (ret < 0) {
errno = -ret;
error("failed to search device tree: %m");
goto out;
}
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(dev_root, &path);
if (ret < 0) {
errno = -ret;
error("failed to find next leaf: %m");
goto out;
}
if (ret > 0) {
ret = 0;
goto out;
}
}
while (1) {
struct btrfs_dev_extent *dev_ext;
struct btrfs_device *dev;
u64 devid;
u64 physical_offset;
u64 physical_len;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.type != BTRFS_DEV_EXTENT_KEY)
break;
dev_ext = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_dev_extent);
devid = key.objectid;
physical_offset = key.offset;
physical_len = btrfs_dev_extent_length(path.nodes[0], dev_ext);
dev = btrfs_find_device(gfs_info, devid, NULL, NULL);
if (!dev) {
error("failed to find device with devid %llu", devid);
ret = -EUCLEAN;
goto out;
}
if (prev_devid == devid && prev_dev_ext_end > physical_offset) {
error(
"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
devid, physical_offset, prev_dev_ext_end);
ret = -EUCLEAN;
goto out;
}
if (physical_offset + physical_len > dev->total_bytes) {
error(
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
devid, physical_offset, physical_len,
dev->total_bytes);
ret = -EUCLEAN;
goto out;
}
prev_devid = devid;
prev_dev_ext_end = physical_offset + physical_len;
ret = btrfs_next_item(dev_root, &path);
if (ret < 0) {
errno = -ret;
error("failed to find next leaf: %m");
goto out;
}
if (ret > 0) {
ret = 0;
break;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static int load_super_root(struct list_head *head, struct btrfs_root *root)
{
u8 level;
if (!root)
return 0;
level = btrfs_header_level(root->node);
return add_root_item_to_list(head, root->root_key.objectid,
root->node->start, 0, level, 0, NULL);
}
static int check_chunks_and_extents(void)
{
struct rb_root dev_cache;
struct cache_tree chunk_cache;
struct block_group_tree block_group_cache;
struct device_extent_tree dev_extent_cache;
struct cache_tree extent_cache;
struct cache_tree seen;
struct cache_tree pending;
struct cache_tree reada;
struct cache_tree nodes;
struct extent_io_tree excluded_extents;
struct cache_tree corrupt_blocks;
int ret, err = 0;
struct block_info *bits;
int bits_nr;
struct list_head dropping_trees;
struct list_head normal_trees;
struct btrfs_root *root;
root = gfs_info->fs_root;
dev_cache = RB_ROOT;
cache_tree_init(&chunk_cache);
block_group_tree_init(&block_group_cache);
device_extent_tree_init(&dev_extent_cache);
cache_tree_init(&extent_cache);
cache_tree_init(&seen);
cache_tree_init(&pending);
cache_tree_init(&nodes);
cache_tree_init(&reada);
cache_tree_init(&corrupt_blocks);
extent_io_tree_init(&excluded_extents);
INIT_LIST_HEAD(&dropping_trees);
INIT_LIST_HEAD(&normal_trees);
if (opt_check_repair) {
gfs_info->excluded_extents = &excluded_extents;
gfs_info->fsck_extent_cache = &extent_cache;
gfs_info->free_extent_hook = free_extent_hook;
gfs_info->corrupt_blocks = &corrupt_blocks;
}
bits_nr = 1024;
bits = malloc(bits_nr * sizeof(struct block_info));
if (!bits) {
error_msg(ERROR_MSG_MEMORY, NULL);
exit(1);
}
again:
ret = load_super_root(&normal_trees, gfs_info->tree_root);
if (ret < 0)
goto out;
ret = load_super_root(&normal_trees, gfs_info->chunk_root);
if (ret < 0)
goto out;
ret = parse_tree_roots(&normal_trees, &dropping_trees);
if (ret < 0)
goto out;
/*
* check_block can return -EAGAIN if it fixes something, please keep
* this in mind when dealing with return values from these functions, if
* we get -EAGAIN we want to fall through and restart the loop.
*/
ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
&seen, &reada, &nodes, &extent_cache,
&chunk_cache, &dev_cache, &block_group_cache,
&dev_extent_cache);
if (ret < 0) {
if (ret == -EAGAIN)
goto loop;
goto out;
}
ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
&pending, &seen, &reada, &nodes,
&extent_cache, &chunk_cache, &dev_cache,
&block_group_cache, &dev_extent_cache);
if (ret < 0) {
if (ret == -EAGAIN)
goto loop;
goto out;
}
ret = check_dev_extents();
if (ret < 0) {
err = ret;
goto out;
}
ret = check_chunks(&chunk_cache, &block_group_cache,
&dev_extent_cache, NULL, NULL, NULL, 0);
if (ret) {
if (ret == -EAGAIN)
goto loop;
err = ret;
}
ret = check_extent_refs(root, &extent_cache);
if (ret < 0) {
if (ret == -EAGAIN)
goto loop;
goto out;
}
ret = check_block_groups(&block_group_cache);
if (ret) {
if (ret == -EAGAIN)
goto loop;
goto out;
}
ret = check_devices(&dev_cache, &dev_extent_cache);
if (ret && err)
ret = err;
out:
if (opt_check_repair) {
free_corrupt_blocks_tree(gfs_info->corrupt_blocks);
extent_io_tree_cleanup(&excluded_extents);
gfs_info->fsck_extent_cache = NULL;
gfs_info->free_extent_hook = NULL;
gfs_info->corrupt_blocks = NULL;
gfs_info->excluded_extents = NULL;
}
free(bits);
free_chunk_cache_tree(&chunk_cache);
free_device_cache_tree(&dev_cache);
free_block_group_tree(&block_group_cache);
free_device_extent_tree(&dev_extent_cache);
free_extent_cache_tree(&seen);
free_extent_cache_tree(&pending);
free_extent_cache_tree(&reada);
free_extent_cache_tree(&nodes);
free_root_item_list(&normal_trees);
free_root_item_list(&dropping_trees);
return ret;
loop:
free_corrupt_blocks_tree(gfs_info->corrupt_blocks);
free_extent_cache_tree(&seen);
free_extent_cache_tree(&pending);
free_extent_cache_tree(&reada);
free_extent_cache_tree(&nodes);
free_chunk_cache_tree(&chunk_cache);
free_block_group_tree(&block_group_cache);
free_device_cache_tree(&dev_cache);
free_device_extent_tree(&dev_extent_cache);
free_extent_record_cache(&extent_cache);
free_root_item_list(&normal_trees);
free_root_item_list(&dropping_trees);
extent_io_tree_cleanup(&excluded_extents);
goto again;
}
static int do_check_chunks_and_extents(void)
{
int ret;
if (check_mode == CHECK_MODE_LOWMEM)
ret = check_chunks_and_extents_lowmem();
else
ret = check_chunks_and_extents();
/* Also repair device size related problems */
if (opt_check_repair && !ret) {
ret = btrfs_fix_device_and_super_size(gfs_info);
if (ret > 0)
ret = 0;
}
/*
* If we have error unfixed, exit right now, as super num is
* really a minor problem compared to any problems found above.
*/
if (ret)
return ret;
ret = check_and_repair_super_num_devs(gfs_info);
return ret;
}
static struct extent_buffer *btrfs_fsck_clear_root(
struct btrfs_trans_handle *trans,
struct btrfs_key *key)
{
struct btrfs_root_item ri = {};
struct btrfs_path *path;
struct extent_buffer *c;
struct btrfs_disk_key disk_key = {};
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
c = btrfs_alloc_free_block(trans, gfs_info->tree_root,
gfs_info->nodesize, key->objectid,
&disk_key, 0, 0, 0);
if (IS_ERR(c)) {
btrfs_free_path(path);
return c;
}
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(c, 0);
btrfs_set_header_bytenr(c, c->start);
btrfs_set_header_generation(c, trans->transid);
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, key->objectid);
write_extent_buffer(c, gfs_info->fs_devices->metadata_uuid,
btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(c, gfs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(c),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(c);
/*
* The root item may not exist, try to insert an empty one so it exists,
* otherwise simply update the existing one with the correct settings.
*/
ret = btrfs_insert_empty_item(trans, gfs_info->tree_root, path, key,
sizeof(ri));
if (ret == -EEXIST) {
read_extent_buffer(path->nodes[0], &ri,
btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]),
sizeof(ri));
} else if (ret) {
btrfs_free_path(path);
free_extent_buffer(c);
return ERR_PTR(ret);
}
btrfs_set_root_bytenr(&ri, c->start);
btrfs_set_root_generation(&ri, trans->transid);
btrfs_set_root_refs(&ri, 1);
btrfs_set_root_used(&ri, c->len);
btrfs_set_root_generation_v2(&ri, trans->transid);
write_extent_buffer(path->nodes[0], &ri,
btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]),
sizeof(ri));
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
return c;
}
static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct extent_buffer *c;
c = btrfs_fsck_clear_root(trans, &root->root_key);
if (IS_ERR(c))
return PTR_ERR(c);
free_extent_buffer(root->node);
root->node = c;
add_root_to_dirty_list(root);
return 0;
}
static int reset_block_groups(void)
{
struct btrfs_block_group *cache;
struct btrfs_path path;
struct extent_buffer *leaf;
struct btrfs_chunk *chunk;
struct btrfs_key key;
int ret;
u64 start;
btrfs_init_path(&path);
key.objectid = 0;
key.type = BTRFS_CHUNK_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, gfs_info->chunk_root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_release_path(&path);
return ret;
}
/*
* We do this in case the block groups were screwed up and had alloc
* bits that aren't actually set on the chunks. This happens with
* restored images every time and could happen in real life I guess.
*/
gfs_info->avail_data_alloc_bits = 0;
gfs_info->avail_metadata_alloc_bits = 0;
gfs_info->avail_system_alloc_bits = 0;
/* First we need to create the in-memory block groups */
while (1) {
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(gfs_info->chunk_root, &path);
if (ret < 0) {
btrfs_release_path(&path);
return ret;
}
if (ret) {
ret = 0;
break;
}
}
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.type != BTRFS_CHUNK_ITEM_KEY) {
path.slots[0]++;
continue;
}
chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
btrfs_add_block_group(gfs_info, 0,
btrfs_chunk_type(leaf, chunk), key.offset,
btrfs_chunk_length(leaf, chunk));
set_extent_dirty(&gfs_info->free_space_cache, key.offset,
key.offset + btrfs_chunk_length(leaf, chunk));
path.slots[0]++;
}
start = 0;
while (1) {
cache = btrfs_lookup_first_block_group(gfs_info, start);
if (!cache)
break;
cache->cached = 1;
start = cache->start + cache->length;
}
btrfs_release_path(&path);
return 0;
}
static int reset_balance(struct btrfs_trans_handle *trans)
{
struct btrfs_root *root = gfs_info->tree_root;
struct btrfs_path path;
struct extent_buffer *leaf;
struct btrfs_key key;
int del_slot, del_nr = 0;
int ret;
int found = 0;
btrfs_init_path(&path);
key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret) {
if (ret > 0)
ret = 0;
if (!ret)
goto reinit_data_reloc;
else
goto out;
}
ret = btrfs_del_item(trans, root, &path);
if (ret)
goto out;
btrfs_release_path(&path);
key.objectid = BTRFS_TREE_RELOC_OBJECTID;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret < 0)
goto out;
while (1) {
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
if (!found)
break;
if (del_nr) {
ret = btrfs_del_items(trans, root, &path,
del_slot, del_nr);
del_nr = 0;
if (ret)
goto out;
}
key.offset++;
btrfs_release_path(&path);
found = 0;
ret = btrfs_search_slot(trans, root, &key, &path,
-1, 1);
if (ret < 0)
goto out;
continue;
}
found = 1;
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
break;
if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
path.slots[0]++;
continue;
}
if (!del_nr) {
del_slot = path.slots[0];
del_nr = 1;
} else {
del_nr++;
}
path.slots[0]++;
}
if (del_nr) {
ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
if (ret)
goto out;
}
btrfs_release_path(&path);
reinit_data_reloc:
key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
fprintf(stderr, "Error reading data reloc tree\n");
ret = PTR_ERR(root);
goto out;
}
record_root_in_trans(trans, root);
ret = btrfs_fsck_reinit_root(trans, root);
if (ret)
goto out;
ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
out:
btrfs_release_path(&path);
return ret;
}
static int reinit_global_roots(struct btrfs_trans_handle *trans, u64 objectid)
{
struct btrfs_key key = {
.objectid = objectid,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
struct btrfs_path path;
struct btrfs_root *tree_root = gfs_info->tree_root;
struct btrfs_root *root;
int ret;
btrfs_init_path(&path);
while (1) {
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
if (ret) {
if (ret == 1) {
/* We should at least find the first one. */
if (key.offset == 0)
ret = -ENOENT;
else
ret = 0;
}
break;
}
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid != objectid)
break;
btrfs_release_path(&path);
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
error("Error reading global root [%llu %llu]",
key.objectid, key.offset);
ret = PTR_ERR(root);
break;
}
ret = btrfs_fsck_reinit_root(trans, root);
if (ret)
break;
key.offset++;
}
btrfs_release_path(&path);
return ret;
}
static int reinit_extent_tree(struct btrfs_trans_handle *trans, bool pin)
{
struct btrfs_root *bg_root = btrfs_block_group_root(trans->fs_info);
u64 start = 0;
int ret;
/*
* The only reason we don't do this is because right now we're just
* walking the trees we find and pinning down their bytes, we don't look
* at any of the leaves. In order to do mixed groups we'd have to check
* the leaves of any fs roots and pin down the bytes for any file
* extents we find. Not hard but why do it if we don't have to?
*/
if (btrfs_fs_incompat(gfs_info, MIXED_GROUPS)) {
fprintf(stderr, "We don't support re-initing the extent tree "
"for mixed block groups yet, please notify a btrfs "
"developer you want to do this so they can add this "
"functionality.\n");
return -EINVAL;
}
/*
* first we need to walk all of the trees except the extent tree and pin
* down/exclude the bytes that are in use so we don't overwrite any
* existing metadata.
* If pinned, unpin will be done in the end of transaction.
* If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
*/
again:
if (pin) {
ret = pin_metadata_blocks();
if (ret) {
fprintf(stderr, "error pinning down used bytes\n");
return ret;
}
} else {
ret = exclude_metadata_blocks();
if (ret) {
fprintf(stderr, "error excluding used bytes\n");
printf("try to pin down used bytes\n");
pin = true;
goto again;
}
}
/*
* Need to drop all the block groups since we're going to recreate all
* of them again.
*/
btrfs_free_block_groups(gfs_info);
ret = reset_block_groups();
if (ret) {
fprintf(stderr, "error resetting the block groups\n");
return ret;
}
/* Ok we can allocate now, reinit the extent root */
ret = reinit_global_roots(trans, BTRFS_EXTENT_TREE_OBJECTID);
if (ret) {
fprintf(stderr, "extent root initialization failed\n");
/*
* When the transaction code is updated we should end the
* transaction, but for now progs only knows about commit so
* just return an error.
*/
return ret;
}
/*
* If we are extent tree v2 then we can reint the block group root as
* well.
*/
if (btrfs_fs_compat_ro(gfs_info, BLOCK_GROUP_TREE)) {
ret = btrfs_fsck_reinit_root(trans, gfs_info->block_group_root);
if (ret) {
fprintf(stderr, "block group initialization failed\n");
return ret;
}
}
/*
* Now we have all the in-memory block groups setup so we can make
* allocations properly, and the metadata we care about is safe since we
* pinned all of it above.
*/
while (1) {
struct btrfs_block_group_item bgi;
struct btrfs_block_group *cache;
struct btrfs_key key;
cache = btrfs_lookup_first_block_group(gfs_info, start);
if (!cache)
break;
start = cache->start + cache->length;
btrfs_set_stack_block_group_used(&bgi, cache->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
key.objectid = cache->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = cache->length;
ret = btrfs_insert_item(trans, bg_root, &key, &bgi,
sizeof(bgi));
if (ret) {
fprintf(stderr, "Error adding block group\n");
return ret;
}
btrfs_run_delayed_refs(trans, -1);
}
ret = reset_balance(trans);
if (ret)
fprintf(stderr, "error resetting the pending balance\n");
return ret;
}
static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
{
struct btrfs_path path;
struct btrfs_trans_handle *trans;
struct btrfs_key key;
int ret;
printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
bad->key.type, bad->key.offset);
key.objectid = bad->root_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
fprintf(stderr, "Couldn't find owner root %llu\n",
key.objectid);
return PTR_ERR(root);
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_init_path(&path);
ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
if (ret) {
if (ret > 0)
ret = 0;
goto out;
}
ret = btrfs_del_item(trans, root, &path);
out:
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
return ret;
}
static int zero_log_tree(struct btrfs_root *root)
{
int ret;
btrfs_set_super_log_root(gfs_info->super_copy, 0);
btrfs_set_super_log_root_level(gfs_info->super_copy, 0);
/* Don't use transaction for overwriting only the super block */
ret = write_all_supers(gfs_info);
return ret;
}
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
static void free_roots_info_cache(void)
{
if (!roots_info_cache)
return;
while (!cache_tree_empty(roots_info_cache)) {
struct cache_extent *entry;
struct root_item_info *rii;
entry = first_cache_extent(roots_info_cache);
if (!entry)
break;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
remove_cache_extent(roots_info_cache, entry);
rii = container_of(entry, struct root_item_info, cache_extent);
free(rii);
}
free(roots_info_cache);
roots_info_cache = NULL;
}
static int build_roots_info_cache(void)
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
{
struct btrfs_root *extent_root = btrfs_extent_root(gfs_info, 0);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
int ret = 0;
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_path path;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (!roots_info_cache) {
roots_info_cache = malloc(sizeof(*roots_info_cache));
if (!roots_info_cache)
return -ENOMEM;
cache_tree_init(roots_info_cache);
}
btrfs_init_path(&path);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
key.objectid = 0;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (ret < 0)
goto out;
leaf = path.nodes[0];
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
while (1) {
struct btrfs_key found_key;
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
btrfs-progs: btrfs-progs: Fix read beyond boundary bug in build_roots_info_cache() This bug is exposed by fsck-test with D=asan, hit by test case 020, with the following error report: ================================================================= ==10740==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x621000061580 at pc 0x56051f0db6cd bp 0x7ffe170f3e20 sp 0x7ffe170f3e10 READ of size 1 at 0x621000061580 thread T0 #0 0x56051f0db6cc in btrfs_extent_inline_ref_type /home/adam/btrfs/btrfs-progs/ctree.h:1727 #1 0x56051f13b669 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14306 #2 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #3 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #4 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #5 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) #6 0x56051efe68c9 in _start (/home/adam/btrfs/btrfs-progs/btrfs+0x5b8c9) 0x621000061580 is located 0 bytes to the right of 4224-byte region [0x621000060500,0x621000061580) allocated by thread T0 here: #0 0x7f04ded50ce1 in __interceptor_calloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cc:70 #1 0x56051f04685e in __alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:553 #2 0x56051f047563 in alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:687 #3 0x56051efff1d1 in btrfs_find_create_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:187 #4 0x56051f000133 in read_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:327 #5 0x56051efeddb8 in read_node_slot /home/adam/btrfs/btrfs-progs/ctree.c:652 #6 0x56051effb0d9 in btrfs_next_leaf /home/adam/btrfs/btrfs-progs/ctree.c:2853 #7 0x56051f13b343 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14267 #8 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #9 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #10 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #11 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) It's completely possible that one extent/metadata item has no inline reference, while build_roots_info_cache() doesn't have such check. Fix it by checking @iref against item end to avoid such problem. Issue: #92 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-02-14 23:15:46 +08:00
unsigned long item_end;
int slot = path.slots[0];
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
int type;
u64 flags;
u64 root_id;
u8 level;
struct cache_extent *entry;
struct root_item_info *rii;
g_task_ctx.item_count++;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(extent_root, &path);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (ret < 0) {
break;
} else if (ret) {
ret = 0;
break;
}
leaf = path.nodes[0];
slot = path.slots[0];
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
}
btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
found_key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
item_end = (unsigned long)ei + btrfs_item_size(leaf, slot);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
goto next;
if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
level = found_key.offset;
} else {
struct btrfs_tree_block_info *binfo;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
binfo = (struct btrfs_tree_block_info *)(ei + 1);
iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
level = btrfs_tree_block_level(leaf, binfo);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
}
btrfs-progs: btrfs-progs: Fix read beyond boundary bug in build_roots_info_cache() This bug is exposed by fsck-test with D=asan, hit by test case 020, with the following error report: ================================================================= ==10740==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x621000061580 at pc 0x56051f0db6cd bp 0x7ffe170f3e20 sp 0x7ffe170f3e10 READ of size 1 at 0x621000061580 thread T0 #0 0x56051f0db6cc in btrfs_extent_inline_ref_type /home/adam/btrfs/btrfs-progs/ctree.h:1727 #1 0x56051f13b669 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14306 #2 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #3 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #4 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #5 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) #6 0x56051efe68c9 in _start (/home/adam/btrfs/btrfs-progs/btrfs+0x5b8c9) 0x621000061580 is located 0 bytes to the right of 4224-byte region [0x621000060500,0x621000061580) allocated by thread T0 here: #0 0x7f04ded50ce1 in __interceptor_calloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cc:70 #1 0x56051f04685e in __alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:553 #2 0x56051f047563 in alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:687 #3 0x56051efff1d1 in btrfs_find_create_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:187 #4 0x56051f000133 in read_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:327 #5 0x56051efeddb8 in read_node_slot /home/adam/btrfs/btrfs-progs/ctree.c:652 #6 0x56051effb0d9 in btrfs_next_leaf /home/adam/btrfs/btrfs-progs/ctree.c:2853 #7 0x56051f13b343 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14267 #8 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #9 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #10 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #11 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) It's completely possible that one extent/metadata item has no inline reference, while build_roots_info_cache() doesn't have such check. Fix it by checking @iref against item end to avoid such problem. Issue: #92 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-02-14 23:15:46 +08:00
/*
* It's a valid extent/metadata item that has no inline ref,
* but SHARED_BLOCK_REF or other shared references.
* So we need to do extra check to avoid reading beyond leaf
* boundary.
btrfs-progs: btrfs-progs: Fix read beyond boundary bug in build_roots_info_cache() This bug is exposed by fsck-test with D=asan, hit by test case 020, with the following error report: ================================================================= ==10740==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x621000061580 at pc 0x56051f0db6cd bp 0x7ffe170f3e20 sp 0x7ffe170f3e10 READ of size 1 at 0x621000061580 thread T0 #0 0x56051f0db6cc in btrfs_extent_inline_ref_type /home/adam/btrfs/btrfs-progs/ctree.h:1727 #1 0x56051f13b669 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14306 #2 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #3 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #4 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #5 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) #6 0x56051efe68c9 in _start (/home/adam/btrfs/btrfs-progs/btrfs+0x5b8c9) 0x621000061580 is located 0 bytes to the right of 4224-byte region [0x621000060500,0x621000061580) allocated by thread T0 here: #0 0x7f04ded50ce1 in __interceptor_calloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cc:70 #1 0x56051f04685e in __alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:553 #2 0x56051f047563 in alloc_extent_buffer /home/adam/btrfs/btrfs-progs/extent_io.c:687 #3 0x56051efff1d1 in btrfs_find_create_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:187 #4 0x56051f000133 in read_tree_block /home/adam/btrfs/btrfs-progs/disk-io.c:327 #5 0x56051efeddb8 in read_node_slot /home/adam/btrfs/btrfs-progs/ctree.c:652 #6 0x56051effb0d9 in btrfs_next_leaf /home/adam/btrfs/btrfs-progs/ctree.c:2853 #7 0x56051f13b343 in build_roots_info_cache /home/adam/btrfs/btrfs-progs/cmds-check.c:14267 #8 0x56051f13c86a in repair_root_items /home/adam/btrfs/btrfs-progs/cmds-check.c:14450 #9 0x56051f13ea89 in cmd_check /home/adam/btrfs/btrfs-progs/cmds-check.c:14965 #10 0x56051efe75bb in main /home/adam/btrfs/btrfs-progs/btrfs.c:302 #11 0x7f04ddbb0f49 in __libc_start_main (/usr/lib/libc.so.6+0x20f49) It's completely possible that one extent/metadata item has no inline reference, while build_roots_info_cache() doesn't have such check. Fix it by checking @iref against item end to avoid such problem. Issue: #92 Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2018-02-14 23:15:46 +08:00
*/
if ((unsigned long)iref >= item_end)
goto next;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
/*
* For a root extent, it must be of the following type and the
* first (and only one) iref in the item.
*/
type = btrfs_extent_inline_ref_type(leaf, iref);
if (type != BTRFS_TREE_BLOCK_REF_KEY)
goto next;
root_id = btrfs_extent_inline_ref_offset(leaf, iref);
entry = lookup_cache_extent(roots_info_cache, root_id, 1);
if (!entry) {
rii = malloc(sizeof(struct root_item_info));
if (!rii) {
ret = -ENOMEM;
goto out;
}
rii->cache_extent.start = root_id;
rii->cache_extent.size = 1;
rii->level = (u8)-1;
entry = &rii->cache_extent;
ret = insert_cache_extent(roots_info_cache, entry);
ASSERT(ret == 0);
} else {
rii = container_of(entry, struct root_item_info,
cache_extent);
}
ASSERT(rii->cache_extent.start == root_id);
ASSERT(rii->cache_extent.size == 1);
if (level > rii->level || rii->level == (u8)-1) {
rii->level = level;
rii->bytenr = found_key.objectid;
rii->gen = btrfs_extent_generation(leaf, ei);
rii->node_count = 1;
} else if (level == rii->level) {
rii->node_count++;
}
next:
path.slots[0]++;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
}
out:
btrfs_release_path(&path);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
return ret;
}
static int maybe_repair_root_item(struct btrfs_path *path,
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
const struct btrfs_key *root_key,
const int read_only_mode)
{
const u64 root_id = root_key->objectid;
struct cache_extent *entry;
struct root_item_info *rii;
struct btrfs_root_item ri;
unsigned long offset;
entry = lookup_cache_extent(roots_info_cache, root_id, 1);
if (!entry) {
fprintf(stderr,
"Error: could not find extent items for root %llu\n",
root_key->objectid);
return -ENOENT;
}
rii = container_of(entry, struct root_item_info, cache_extent);
ASSERT(rii->cache_extent.start == root_id);
ASSERT(rii->cache_extent.size == 1);
if (rii->node_count != 1) {
fprintf(stderr,
"Error: could not find btree root extent for root %llu\n",
root_id);
return -ENOENT;
}
offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
if (btrfs_root_bytenr(&ri) != rii->bytenr ||
btrfs_root_level(&ri) != rii->level ||
btrfs_root_generation(&ri) != rii->gen) {
/*
* If we're in repair mode but our caller told us to not update
* the root item, i.e. just check if it needs to be updated, don't
* print this message, since the caller will call us again shortly
* for the same root item without read only mode (the caller will
* open a transaction first).
*/
if (!(read_only_mode && opt_check_repair))
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
fprintf(stderr,
"%sroot item for root %llu,"
" current bytenr %llu, current gen %llu, current level %u,"
" new bytenr %llu, new gen %llu, new level %u\n",
(read_only_mode ? "" : "fixing "),
root_id,
btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
btrfs_root_level(&ri),
rii->bytenr, rii->gen, rii->level);
if (btrfs_root_generation(&ri) > rii->gen) {
fprintf(stderr,
"root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
root_id, btrfs_root_generation(&ri), rii->gen);
return -EINVAL;
}
if (!read_only_mode) {
btrfs_set_root_bytenr(&ri, rii->bytenr);
btrfs_set_root_level(&ri, rii->level);
btrfs_set_root_generation(&ri, rii->gen);
write_extent_buffer(path->nodes[0], &ri,
offset, sizeof(ri));
}
return 1;
}
return 0;
}
/*
* A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
* caused read-only snapshots to be corrupted if they were created at a moment
* when the source subvolume/snapshot had orphan items. The issue was that the
* on-disk root items became incorrect, referring to the pre orphan cleanup root
* node instead of the post orphan cleanup root node.
* So this function, and its callees, just detects and fixes those cases. Even
* though the regression was for read-only snapshots, this function applies to
* any snapshot/subvolume root.
* This must be run before any other repair code - not doing it so, makes other
* repair code delete or modify backrefs in the extent tree for example, which
* will result in an inconsistent fs after repairing the root items.
*/
static int repair_root_items(void)
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
{
struct btrfs_path path;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans = NULL;
int ret = 0;
int bad_roots = 0;
int need_trans = 0;
if (btrfs_fs_incompat(gfs_info, EXTENT_TREE_V2))
return 0;
btrfs_init_path(&path);
ret = build_roots_info_cache();
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (ret)
goto out;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
again:
/*
* Avoid opening and committing transactions if a leaf doesn't have
* any root items that need to be fixed, so that we avoid rotating
* backup roots unnecessarily.
*/
if (need_trans) {
trans = btrfs_start_transaction(gfs_info->tree_root, 1);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
}
ret = btrfs_search_slot(trans, gfs_info->tree_root, &key, &path,
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
0, trans ? 1 : 0);
if (ret < 0)
goto out;
leaf = path.nodes[0];
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
while (1) {
struct btrfs_key found_key;
if (path.slots[0] >= btrfs_header_nritems(leaf)) {
int no_more_keys = find_next_key(&path, &key);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
btrfs_release_path(&path);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (trans) {
ret = btrfs_commit_transaction(trans,
gfs_info->tree_root);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
trans = NULL;
if (ret < 0)
goto out;
}
need_trans = 0;
if (no_more_keys)
break;
goto again;
}
btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (found_key.type != BTRFS_ROOT_ITEM_KEY)
goto next;
if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
goto next;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (ret < 0)
goto out;
if (ret) {
if (!trans && opt_check_repair) {
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
need_trans = 1;
key = found_key;
btrfs_release_path(&path);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
goto again;
}
bad_roots++;
}
next:
path.slots[0]++;
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
}
ret = 0;
out:
free_roots_info_cache();
btrfs_release_path(&path);
if (trans)
btrfs_commit_transaction(trans, gfs_info->tree_root);
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (ret < 0)
return ret;
return bad_roots;
}
static int check_global_roots_uptodate(void)
{
struct btrfs_root *root;
struct rb_node *n;
int found_csum = 0, found_extent = 0, found_fst = 0;
int ret = 0;
for (n = rb_first(&gfs_info->global_roots_tree); n; n = rb_next(n)) {
root = rb_entry(n, struct btrfs_root, rb_node);
if (!extent_buffer_uptodate(root->node)) {
error("chritical: global root [%llu %llu] not uptodate, unable to check the file system",
root->root_key.objectid, root->root_key.offset);
return -EIO;
}
switch(root->root_key.objectid) {
case BTRFS_EXTENT_TREE_OBJECTID:
found_extent++;
break;
case BTRFS_CSUM_TREE_OBJECTID:
found_csum++;
break;
case BTRFS_FREE_SPACE_TREE_OBJECTID:
found_fst++;
break;
default:
break;
}
}
if (found_extent != gfs_info->nr_global_roots) {
error("found %d extent roots, expected %llu", found_extent,
gfs_info->nr_global_roots);
ret = -EIO;
}
if (found_csum != gfs_info->nr_global_roots) {
error("found %d csum roots, expected %llu", found_csum,
gfs_info->nr_global_roots);
ret = -EIO;
}
if (!btrfs_fs_compat_ro(gfs_info, FREE_SPACE_TREE))
return ret;
if (found_fst != gfs_info->nr_global_roots) {
error("found %d free space roots, expected %llu", found_fst,
gfs_info->nr_global_roots);
ret = -EIO;
}
return ret;
}
static const char * const cmd_check_usage[] = {
"btrfs check [options] <device>",
"Check structural integrity of a filesystem (unmounted).",
"Check structural integrity of an unmounted filesystem. Verify internal",
"trees' consistency and item connectivity. In the repair mode try to",
"fix the problems found. ",
"WARNING: the repair mode is considered dangerous and should not be used",
" without prior analysis of problems found on the filesystem."
"",
"Options:",
" starting point selection:",
" -s|--super <superblock> use this superblock copy",
" -b|--backup use the first valid backup root copy",
" -r|--tree-root <bytenr> use the given bytenr for the tree root",
" --chunk-root <bytenr> use the given bytenr for the chunk tree root",
" operation modes:",
" --readonly run in read-only mode (default)",
" --repair try to repair the filesystem",
" --force skip mount checks, repair is not possible",
" --mode <MODE> allows choice of memory/IO trade-offs",
" where MODE is one of:",
" original - read inodes and extents to memory (requires",
" more memory, does less IO)",
" lowmem - try to use less memory but read blocks again",
" when needed (experimental)",
" repair options:",
" --init-csum-tree create a new CRC tree",
" --init-extent-tree create a new extent tree",
" --clear-space-cache v1|v2 clear space cache for v1 or v2",
" --clear-ino-cache clear ino cache leftover items",
" check and reporting options:",
" --check-data-csum verify checksums of data blocks",
" -Q|--qgroup-report print a report on qgroup consistency",
" -E|--subvol-extents <subvolid>",
" print subvolume extents and sharing state",
" -p|--progress indicate progress",
NULL
};
static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
{
struct cache_tree root_cache;
struct btrfs_root *root;
struct open_ctree_flags ocf = { 0 };
u64 bytenr = 0;
btrfs-progs: show extent state for a subvolume The qgroup verification code can trivially be extended to provide extended information on the extents which a subvolume root references. Along with qgroup-verify, I have found this tool to be invaluable when tracking down extent references. The patch adds a switch to the check subcommand '--subvol-extents' which takes as args a single subvolume id. When run with the switch, we'll print out each extent that the subvolume references. The extent printout gives standard extent info you would expect along with information on which other roots reference it. Sample output follows - this is a few lines from a run on a subvolume I've been testing qgroup changes on: Print extent state for subvolume 281 on /dev/vdb2 UUID: 8203ca66-9858-4e3f-b447-5bbaacf79c02 Offset Len Root Refs Roots 12582912 20480 12 257 279 280 281 282 283 284 285 286 287 288 289 12603392 8192 12 257 279 280 281 282 283 284 285 286 287 288 289 12611584 12288 12 257 279 280 281 282 283 284 285 286 287 288 289 <snip a bunch of extents to show some variety> 124583936 16384 4 281 282 283 280 125075456 16384 4 280 281 282 283 126255104 16384 11 257 280 281 282 283 284 285 286 287 288 289 4763508736 4096 3 279 280 281 In case it wasn't clear, this applies on top of my qgroup verify patch: "btrfs-progs: add quota group verify code" A branch with all this can be found on github: https://github.com/markfasheh/btrfs-progs-patches/tree/qgroup-verify Please apply, Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-07-03 04:34:41 +08:00
u64 subvolid = 0;
u64 tree_root_bytenr = 0;
u64 chunk_root_bytenr = 0;
char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
int ret = 0;
int err = 0;
u64 num;
int init_csum_tree = 0;
int readonly = 0;
int clear_space_cache = 0;
int clear_ino_cache = 0;
int qgroup_report = 0;
int qgroups_repaired = 0;
int qgroup_verify_ret;
btrfs-progs: introduce OPEN_CTREE_ALLOW_TRANSID_MISMATCH flag [BUG] There is a report that, btrfstune can even work while the fs has transid mismatch problems. $ btrfstune -f -u /dev/sdb1 Current fsid: b2b5ae8d-4c49-45f0-b42e-46fe7dcfcb07 New fsid: b2b5ae8d-4c49-45f0-b42e-46fe7dcfcb07 Set superblock flag CHANGING_FSID Change fsid in extents parent transid verify failed on 792854528 wanted 20103 found 20091 parent transid verify failed on 792854528 wanted 20103 found 20091 parent transid verify failed on 792854528 wanted 20103 found 20091 Ignoring transid failure parent transid verify failed on 792870912 wanted 20103 found 20091 parent transid verify failed on 792870912 wanted 20103 found 20091 parent transid verify failed on 792870912 wanted 20103 found 20091 Ignoring transid failure parent transid verify failed on 792887296 wanted 20103 found 20091 parent transid verify failed on 792887296 wanted 20103 found 20091 parent transid verify failed on 792887296 wanted 20103 found 20091 Ignoring transid failure ERROR: child eb corrupted: parent bytenr=38010880 item=69 parent level=1 child level=1 ERROR: failed to change UUID of metadata: -5 ERROR: btrfstune failed This leaves a corrupted fs even more corrupted, and due to the extra CHANGING_FSID flag, btrfs check will not even try to run on it: Opening filesystem to check... ERROR: Filesystem UUID change in progress ERROR: cannot open file system [CAUSE] Unlike kernel, btrfs-progs has a less strict check on transid mismatch. In read_tree_block() we will fall back to use the tree block even its transid mismatch if we can't find any better copy. However not all commands in btrfs-progs needs this feature, only btrfs-check (which may fix the problem) and btrfs-restore (it just tries to ignore any problems) really utilize this feature. [FIX] Introduce a new open ctree flag, OPEN_CTREE_ALLOW_TRANSID_MISMATCH, to be explicit about whether we really want to ignore transid error. Currently only btrfs-check and btrfs-restore will utilize this new flag. Also add btrfs-image to allow opening such fs with transid error. Link: https://www.reddit.com/r/btrfs/comments/pivpqk/failure_during_btrfstune_u/ Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-09-08 10:05:42 +08:00
unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE |
OPEN_CTREE_ALLOW_TRANSID_MISMATCH;
int force = 0;
while(1) {
int c;
enum { GETOPT_VAL_REPAIR = GETOPT_VAL_FIRST, GETOPT_VAL_INIT_CSUM,
GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
GETOPT_VAL_CLEAR_INO_CACHE, GETOPT_VAL_FORCE };
static const struct option long_options[] = {
{ "super", required_argument, NULL, 's' },
{ "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
{ "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
{ "init-csum-tree", no_argument, NULL,
GETOPT_VAL_INIT_CSUM },
{ "init-extent-tree", no_argument, NULL,
GETOPT_VAL_INIT_EXTENT },
{ "check-data-csum", no_argument, NULL,
GETOPT_VAL_CHECK_CSUM },
{ "backup", no_argument, NULL, 'b' },
{ "subvol-extents", required_argument, NULL, 'E' },
{ "qgroup-report", no_argument, NULL, 'Q' },
{ "tree-root", required_argument, NULL, 'r' },
{ "chunk-root", required_argument, NULL,
GETOPT_VAL_CHUNK_TREE },
{ "progress", no_argument, NULL, 'p' },
{ "mode", required_argument, NULL,
GETOPT_VAL_MODE },
{ "clear-space-cache", required_argument, NULL,
GETOPT_VAL_CLEAR_SPACE_CACHE},
{ "clear-ino-cache", no_argument , NULL,
GETOPT_VAL_CLEAR_INO_CACHE},
{ "force", no_argument, NULL, GETOPT_VAL_FORCE },
{ NULL, 0, NULL, 0}
};
c = getopt_long(argc, argv, "as:br:pE:Q", long_options, NULL);
if (c < 0)
break;
switch(c) {
case 'a': /* ignored */ break;
case 'b':
ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
break;
case 's':
num = arg_strtou64(optarg);
if (num >= BTRFS_SUPER_MIRROR_MAX) {
error(
"super mirror should be less than %d",
BTRFS_SUPER_MIRROR_MAX);
exit(1);
}
bytenr = btrfs_sb_offset(((int)num));
printf("using SB copy %llu, bytenr %llu\n", num,
(unsigned long long)bytenr);
break;
case 'Q':
qgroup_report = 1;
break;
btrfs-progs: show extent state for a subvolume The qgroup verification code can trivially be extended to provide extended information on the extents which a subvolume root references. Along with qgroup-verify, I have found this tool to be invaluable when tracking down extent references. The patch adds a switch to the check subcommand '--subvol-extents' which takes as args a single subvolume id. When run with the switch, we'll print out each extent that the subvolume references. The extent printout gives standard extent info you would expect along with information on which other roots reference it. Sample output follows - this is a few lines from a run on a subvolume I've been testing qgroup changes on: Print extent state for subvolume 281 on /dev/vdb2 UUID: 8203ca66-9858-4e3f-b447-5bbaacf79c02 Offset Len Root Refs Roots 12582912 20480 12 257 279 280 281 282 283 284 285 286 287 288 289 12603392 8192 12 257 279 280 281 282 283 284 285 286 287 288 289 12611584 12288 12 257 279 280 281 282 283 284 285 286 287 288 289 <snip a bunch of extents to show some variety> 124583936 16384 4 281 282 283 280 125075456 16384 4 280 281 282 283 126255104 16384 11 257 280 281 282 283 284 285 286 287 288 289 4763508736 4096 3 279 280 281 In case it wasn't clear, this applies on top of my qgroup verify patch: "btrfs-progs: add quota group verify code" A branch with all this can be found on github: https://github.com/markfasheh/btrfs-progs-patches/tree/qgroup-verify Please apply, Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-07-03 04:34:41 +08:00
case 'E':
subvolid = arg_strtou64(optarg);
break;
case 'r':
tree_root_bytenr = arg_strtou64(optarg);
break;
case GETOPT_VAL_CHUNK_TREE:
chunk_root_bytenr = arg_strtou64(optarg);
break;
case 'p':
g_task_ctx.progress_enabled = true;
break;
case '?':
case 'h':
usage(cmd);
case GETOPT_VAL_REPAIR:
printf("enabling repair mode\n");
opt_check_repair = 1;
ctree_flags |= OPEN_CTREE_WRITES;
break;
case GETOPT_VAL_READONLY:
readonly = 1;
break;
case GETOPT_VAL_INIT_CSUM:
printf("Creating a new CRC tree\n");
init_csum_tree = 1;
opt_check_repair = 1;
ctree_flags |= OPEN_CTREE_WRITES;
break;
case GETOPT_VAL_INIT_EXTENT:
init_extent_tree = 1;
ctree_flags |= (OPEN_CTREE_WRITES |
OPEN_CTREE_NO_BLOCK_GROUPS);
opt_check_repair = 1;
break;
case GETOPT_VAL_CHECK_CSUM:
check_data_csum = 1;
break;
case GETOPT_VAL_MODE:
check_mode = parse_check_mode(optarg);
if (check_mode == CHECK_MODE_UNKNOWN) {
error("unknown mode: %s", optarg);
exit(1);
}
break;
case GETOPT_VAL_CLEAR_SPACE_CACHE:
if (strcmp(optarg, "v1") == 0) {
clear_space_cache = 1;
} else if (strcmp(optarg, "v2") == 0) {
clear_space_cache = 2;
ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
} else {
error(
"invalid argument to --clear-space-cache, must be v1 or v2");
exit(1);
}
ctree_flags |= OPEN_CTREE_WRITES;
break;
case GETOPT_VAL_CLEAR_INO_CACHE:
clear_ino_cache = 1;
ctree_flags |= OPEN_CTREE_WRITES;
break;
case GETOPT_VAL_FORCE:
force = 1;
break;
}
}
if (check_argc_exact(argc - optind, 1))
usage(cmd);
if (g_task_ctx.progress_enabled) {
g_task_ctx.tp = TASK_NOTHING;
g_task_ctx.info = task_init(print_status_check, print_status_return, &g_task_ctx);
}
/* This check is the only reason for --readonly to exist */
if (readonly && opt_check_repair) {
error("repair options are not compatible with --readonly");
exit(1);
}
if (opt_check_repair && !force) {
int delay = 10;
printf("WARNING:\n\n");
printf("\tDo not use --repair unless you are advised to do so by a developer\n");
printf("\tor an experienced user, and then only after having accepted that no\n");
printf("\tfsck can successfully repair all types of filesystem corruption. Eg.\n");
printf("\tsome software or hardware bugs can fatally damage a volume.\n");
printf("\tThe operation will start in %d seconds.\n", delay);
printf("\tUse Ctrl-C to stop it.\n");
while (delay) {
printf("%2d", delay--);
fflush(stdout);
sleep(1);
}
printf("\nStarting repair.\n");
}
/*
* experimental and dangerous
*/
if (opt_check_repair && check_mode == CHECK_MODE_LOWMEM)
warning("low-memory mode repair support is only partial");
printf("Opening filesystem to check...\n");
cache_tree_init(&root_cache);
qgroup_set_item_count_ptr(&g_task_ctx.item_count);
ret = check_mounted(argv[optind]);
if (!force) {
if (ret < 0) {
errno = -ret;
error("could not check mount status: %m");
err |= !!ret;
goto err_out;
} else if (ret) {
error(
"%s is currently mounted, use --force if you really intend to check the filesystem",
argv[optind]);
ret = -EBUSY;
err |= !!ret;
goto err_out;
}
} else {
if (ret < 0) {
warning(
"cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
argv[optind]);
} else if (ret) {
warning(
"filesystem mounted, continuing because of --force");
}
/* A block device is mounted in exclusive mode by kernel */
ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
}
/* only allow partial opening under repair mode */
if (opt_check_repair)
ctree_flags |= OPEN_CTREE_PARTIAL;
ocf.filename = argv[optind];
ocf.sb_bytenr = bytenr;
ocf.root_tree_bytenr = tree_root_bytenr;
ocf.chunk_tree_bytenr = chunk_root_bytenr;
ocf.flags = ctree_flags;
gfs_info = open_ctree_fs_info(&ocf);
if (!gfs_info) {
error("cannot open file system");
ret = -EIO;
err |= !!ret;
goto err_out;
}
root = gfs_info->fs_root;
uuid_unparse(gfs_info->super_copy->fsid, uuidbuf);
printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
/*
* Check the bare minimum before starting anything else that could rely
* on it, namely the tree roots, any local consistency checks
*/
if (!extent_buffer_uptodate(gfs_info->tree_root->node) ||
!extent_buffer_uptodate(gfs_info->dev_root->node) ||
!extent_buffer_uptodate(gfs_info->chunk_root->node)) {
error("critical roots corrupted, unable to check the filesystem");
err |= !!ret;
ret = -EIO;
goto close_out;
}
if (clear_space_cache) {
ret = do_clear_free_space_cache(clear_space_cache);
err |= !!ret;
goto close_out;
}
Btrfs-progs: check, ability to detect and fix outdated snapshot root items This change adds code to detect and fix the issue introduced in the kernel release 3.17, where creation of read-only snapshots lead to a corrupted filesystem if they were created at a moment when the source subvolume/snapshot had orphan items. The issue was that the on-disk root items became incorrect, referring to the pre orphan cleanup root node instead of the post orphan cleanup root node. A test filesystem can be generated with the test case recently submitted for xfstests/fstests, which is essencially the following (bash script): workout() { ops=$1 procs=$2 num_snapshots=$3 _scratch_mkfs >> $seqres.full 2>&1 _scratch_mount snapshot_cmd="$BTRFS_UTIL_PROG subvolume snapshot -r $SCRATCH_MNT" snapshot_cmd="$snapshot_cmd $SCRATCH_MNT/snap_\`date +'%H_%M_%S_%N'\`" run_check $FSSTRESS_PROG -p $procs \ -x "$snapshot_cmd" -X $num_snapshots -d $SCRATCH_MNT -n $ops } ops=10000 procs=4 snapshots=500 workout $ops $procs $snapshots Example of btrfsck's (btrfs check) behaviour against such filesystem: $ btrfsck /dev/loop0 root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Found 7 roots with an outdated root item. Please run a filesystem check with the option --repair to fix them. $ echo $? 1 $ btrfsck --repair /dev/loop0 enabling repair mode fixing root item for root 311, current bytenr 44630016, current gen 60, current level 1, new bytenr 44957696, new gen 61, new level 1 fixing root item for root 1480, current bytenr 1003569152, current gen 1271, current level 1, new bytenr 1004175360, new gen 1272, new level 1 fixing root item for root 1509, current bytenr 1037434880, current gen 1300, current level 1, new bytenr 1038467072, new gen 1301, new level 1 fixing root item for root 1562, current bytenr 33636352, current gen 1354, current level 1, new bytenr 34455552, new gen 1355, new level 1 fixing root item for root 3094, current bytenr 1011712000, current gen 2935, current level 1, new bytenr 1008484352, new gen 2936, new level 1 fixing root item for root 3716, current bytenr 80805888, current gen 3578, current level 1, new bytenr 73515008, new gen 3579, new level 1 fixing root item for root 4085, current bytenr 714031104, current gen 3958, current level 1, new bytenr 716816384, new gen 3959, new level 1 Fixed 7 roots. Checking filesystem on /dev/loop0 UUID: 2186e9b9-c977-4a35-9c7b-69c6609d4620 checking extents checking free space cache cache and super generation don't match, space cache will be invalidated checking fs roots checking csums checking root refs found 618537000 bytes used err is 0 total csum bytes: 130824 total tree bytes: 601620480 total fs tree bytes: 580288512 total extent tree bytes: 18464768 btree space waste bytes: 136939144 file data blocks allocated: 34150318080 referenced 27815415808 Btrfs v3.17-rc3-2-gbbe1dd8 $ echo $? 0 Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-10-18 01:20:08 +08:00
if (clear_ino_cache) {
ret = clear_ino_cache_items();
err = ret;
goto close_out;
}
/*
* repair mode will force us to commit transaction which
* will make us fail to load log tree when mounting.
*/
if (opt_check_repair && btrfs_super_log_root(gfs_info->super_copy)) {
ret = ask_user("repair mode will force to clear out log tree, are you sure?");
if (!ret) {
ret = 1;
err |= !!ret;
goto close_out;
}
ret = zero_log_tree(root);
err |= !!ret;
if (ret) {
error("failed to zero log tree: %d", ret);
goto close_out;
}
}
if (qgroup_report) {
printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
uuidbuf);
ret = qgroup_verify_all(gfs_info);
err |= !!ret;
if (ret >= 0)
report_qgroups(1);
goto close_out;
}
btrfs-progs: show extent state for a subvolume The qgroup verification code can trivially be extended to provide extended information on the extents which a subvolume root references. Along with qgroup-verify, I have found this tool to be invaluable when tracking down extent references. The patch adds a switch to the check subcommand '--subvol-extents' which takes as args a single subvolume id. When run with the switch, we'll print out each extent that the subvolume references. The extent printout gives standard extent info you would expect along with information on which other roots reference it. Sample output follows - this is a few lines from a run on a subvolume I've been testing qgroup changes on: Print extent state for subvolume 281 on /dev/vdb2 UUID: 8203ca66-9858-4e3f-b447-5bbaacf79c02 Offset Len Root Refs Roots 12582912 20480 12 257 279 280 281 282 283 284 285 286 287 288 289 12603392 8192 12 257 279 280 281 282 283 284 285 286 287 288 289 12611584 12288 12 257 279 280 281 282 283 284 285 286 287 288 289 <snip a bunch of extents to show some variety> 124583936 16384 4 281 282 283 280 125075456 16384 4 280 281 282 283 126255104 16384 11 257 280 281 282 283 284 285 286 287 288 289 4763508736 4096 3 279 280 281 In case it wasn't clear, this applies on top of my qgroup verify patch: "btrfs-progs: add quota group verify code" A branch with all this can be found on github: https://github.com/markfasheh/btrfs-progs-patches/tree/qgroup-verify Please apply, Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-07-03 04:34:41 +08:00
if (subvolid) {
printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
subvolid, argv[optind], uuidbuf);
ret = print_extent_state(gfs_info, subvolid);
err |= !!ret;
btrfs-progs: show extent state for a subvolume The qgroup verification code can trivially be extended to provide extended information on the extents which a subvolume root references. Along with qgroup-verify, I have found this tool to be invaluable when tracking down extent references. The patch adds a switch to the check subcommand '--subvol-extents' which takes as args a single subvolume id. When run with the switch, we'll print out each extent that the subvolume references. The extent printout gives standard extent info you would expect along with information on which other roots reference it. Sample output follows - this is a few lines from a run on a subvolume I've been testing qgroup changes on: Print extent state for subvolume 281 on /dev/vdb2 UUID: 8203ca66-9858-4e3f-b447-5bbaacf79c02 Offset Len Root Refs Roots 12582912 20480 12 257 279 280 281 282 283 284 285 286 287 288 289 12603392 8192 12 257 279 280 281 282 283 284 285 286 287 288 289 12611584 12288 12 257 279 280 281 282 283 284 285 286 287 288 289 <snip a bunch of extents to show some variety> 124583936 16384 4 281 282 283 280 125075456 16384 4 280 281 282 283 126255104 16384 11 257 280 281 282 283 284 285 286 287 288 289 4763508736 4096 3 279 280 281 In case it wasn't clear, this applies on top of my qgroup verify patch: "btrfs-progs: add quota group verify code" A branch with all this can be found on github: https://github.com/markfasheh/btrfs-progs-patches/tree/qgroup-verify Please apply, Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.cz>
2014-07-03 04:34:41 +08:00
goto close_out;
}
if (init_extent_tree || init_csum_tree) {
struct btrfs_trans_handle *trans;
trans = btrfs_start_transaction(gfs_info->tree_root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error_msg(ERROR_MSG_START_TRANS, "%m");
err |= !!ret;
goto close_out;
}
trans->reinit_extent_tree = true;
if (init_extent_tree) {
printf("Creating a new extent tree\n");
ret = reinit_extent_tree(trans,
check_mode == CHECK_MODE_ORIGINAL);
err |= !!ret;
if (ret)
goto close_out;
}
if (init_csum_tree) {
printf("Reinitialize checksum tree\n");
ret = reinit_global_roots(trans,
BTRFS_CSUM_TREE_OBJECTID);
if (ret) {
error("checksum tree initialization failed: %d",
ret);
ret = -EIO;
err |= !!ret;
goto close_out;
}
ret = fill_csum_tree(trans, init_extent_tree);
err |= !!ret;
if (ret) {
error("checksum tree refilling failed: %d", ret);
return -EIO;
}
}
/*
* Ok now we commit and run the normal fsck, which will add
* extent entries for all of the items it finds.
*/
ret = btrfs_commit_transaction(trans, gfs_info->tree_root);
err |= !!ret;
if (ret)
goto close_out;
}
ret = check_global_roots_uptodate();
if (ret) {
err |= !!ret;
goto close_out;
}
if (!init_extent_tree) {
if (!g_task_ctx.progress_enabled) {
fprintf(stderr, "[1/7] checking root items\n");
} else {
g_task_ctx.tp = TASK_ROOT_ITEMS;
task_start(g_task_ctx.info, &g_task_ctx.start_time,
&g_task_ctx.item_count);
}
ret = repair_root_items();
task_stop(g_task_ctx.info);
if (ret < 0) {
err = !!ret;
errno = -ret;
error("failed to repair root items: %m");
/*
* For repair, if we can't repair root items, it's
* fatal. But for non-repair, it's pretty rare to hit
* such v3.17 era bug, we want to continue check.
*/
if (opt_check_repair)
goto close_out;
err |= 1;
} else {
if (opt_check_repair) {
fprintf(stderr, "Fixed %d roots.\n", ret);
ret = 0;
} else if (ret > 0) {
fprintf(stderr,
"Found %d roots with an outdated root item.\n",
ret);
fprintf(stderr,
"Please run a filesystem check with the option --repair to fix them.\n");
ret = 1;
err |= ret;
}
}
} else {
fprintf(stderr, "[1/7] checking root items... skipped\n");
}
if (!g_task_ctx.progress_enabled) {
fprintf(stderr, "[2/7] checking extents\n");
} else {
g_task_ctx.tp = TASK_EXTENTS;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
ret = do_check_chunks_and_extents();
task_stop(g_task_ctx.info);
err |= !!ret;
if (ret)
error(
"errors found in extent allocation tree or chunk allocation");
/* Only re-check super size after we checked and repaired the fs */
err |= !is_super_size_valid();
is_free_space_tree = btrfs_fs_compat_ro(gfs_info, FREE_SPACE_TREE);
if (!g_task_ctx.progress_enabled) {
if (is_free_space_tree)
fprintf(stderr, "[3/7] checking free space tree\n");
else
fprintf(stderr, "[3/7] checking free space cache\n");
} else {
g_task_ctx.tp = TASK_FREE_SPACE;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
ret = validate_free_space_cache(root);
task_stop(g_task_ctx.info);
err |= !!ret;
/*
* We used to have to have these hole extents in between our real
* extents so if we don't have this flag set we need to make sure there
* are no gaps in the file extents for inodes, otherwise we can just
* ignore it when this happens.
*/
no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
if (!g_task_ctx.progress_enabled) {
fprintf(stderr, "[4/7] checking fs roots\n");
} else {
g_task_ctx.tp = TASK_FS_ROOTS;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
ret = do_check_fs_roots(&root_cache);
task_stop(g_task_ctx.info);
err |= !!ret;
if (ret) {
error("errors found in fs roots");
goto out;
}
if (!g_task_ctx.progress_enabled) {
if (check_data_csum)
fprintf(stderr, "[5/7] checking csums against data\n");
else
fprintf(stderr,
"[5/7] checking only csums items (without verifying data)\n");
} else {
g_task_ctx.tp = TASK_CSUMS;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
ret = check_csums();
task_stop(g_task_ctx.info);
/*
* Data csum error is not fatal, and it may indicate more serious
* corruption, continue checking.
*/
if (ret)
error("errors found in csum tree");
err |= !!ret;
/* For low memory mode, check_fs_roots_v2 handles root refs */
if (check_mode != CHECK_MODE_LOWMEM) {
if (!g_task_ctx.progress_enabled) {
fprintf(stderr, "[6/7] checking root refs\n");
} else {
g_task_ctx.tp = TASK_ROOT_REFS;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
ret = check_root_refs(root, &root_cache);
task_stop(g_task_ctx.info);
err |= !!ret;
if (ret) {
error("errors found in root refs");
goto out;
}
} else {
fprintf(stderr,
"[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
}
while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
struct extent_buffer *eb;
eb = list_first_entry(&gfs_info->recow_ebs,
struct extent_buffer, recow);
list_del_init(&eb->recow);
ret = recow_extent_buffer(root, eb);
free_extent_buffer(eb);
err |= !!ret;
if (ret) {
error("fails to fix transid errors");
break;
}
}
while (!list_empty(&delete_items)) {
struct bad_item *bad;
bad = list_first_entry(&delete_items, struct bad_item, list);
list_del_init(&bad->list);
if (opt_check_repair) {
ret = delete_bad_item(root, bad);
err |= !!ret;
}
free(bad);
}
if (gfs_info->quota_enabled) {
if (!g_task_ctx.progress_enabled) {
fprintf(stderr, "[7/7] checking quota groups\n");
} else {
g_task_ctx.tp = TASK_QGROUPS;
task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
}
qgroup_verify_ret = qgroup_verify_all(gfs_info);
task_stop(g_task_ctx.info);
if (qgroup_verify_ret < 0) {
error("failed to check quota groups");
err |= !!qgroup_verify_ret;
goto out;
}
report_qgroups(0);
ret = repair_qgroups(gfs_info, &qgroups_repaired, false);
if (ret) {
error("failed to repair quota groups");
goto out;
}
if (qgroup_verify_ret && (!qgroups_repaired || ret))
err |= !!qgroup_verify_ret;
ret = 0;
} else {
fprintf(stderr,
"[7/7] checking quota groups skipped (not enabled on this FS)\n");
}
if (!list_empty(&gfs_info->recow_ebs)) {
error("transid errors in file system");
ret = 1;
err |= !!ret;
}
out:
printf("found %llu bytes used, ",
(unsigned long long)bytes_used);
if (err)
printf("error(s) found\n");
else
printf("no error found\n");
2007-06-12 20:08:04 +08:00
printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
printf("total tree bytes: %llu\n",
(unsigned long long)total_btree_bytes);
printf("total fs tree bytes: %llu\n",
(unsigned long long)total_fs_tree_bytes);
printf("total extent tree bytes: %llu\n",
(unsigned long long)total_extent_tree_bytes);
2007-06-12 20:08:04 +08:00
printf("btree space waste bytes: %llu\n",
(unsigned long long)btree_space_waste);
printf("file data blocks allocated: %llu\n referenced %llu\n",
(unsigned long long)data_bytes_allocated,
(unsigned long long)data_bytes_referenced);
free_qgroup_counts();
free_root_recs_tree(&root_cache);
close_out:
close_ctree(root);
err_out:
if (g_task_ctx.progress_enabled)
task_deinit(g_task_ctx.info);
return err;
}
DEFINE_SIMPLE_COMMAND(check, "check");