mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-17 01:34:00 +08:00
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This has our merge window series of cleanups and fixes. These target a wide range of issues, but do include some important fixes for qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a few definitions to make them easier for userland to consume. Also whiteout support is included now that issues with overlayfs have been cleared up. I have one more fix pending for page faults during btrfs_copy_from_user, but I wanted to get this bulk out the door first" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits) btrfs: fix memory leak during RAID 5/6 device replacement Btrfs: add semaphore to synchronize direct IO writes with fsync Btrfs: fix race between block group relocation and nocow writes Btrfs: fix race between fsync and direct IO writes for prealloc extents Btrfs: fix number of transaction units for renames with whiteout Btrfs: pin logs earlier when doing a rename exchange operation Btrfs: unpin logs if rename exchange operation fails Btrfs: fix inode leak on failure to setup whiteout inode in rename btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT Btrfs: pin log earlier when renaming Btrfs: unpin log if rename operation fails Btrfs: don't do unnecessary delalloc flushes when relocating Btrfs: don't wait for unrelated IO to finish before relocation Btrfs: fix empty symlink after creating symlink and fsync parent dir Btrfs: fix for incorrect directory entries after fsync log replay btrfs: build fixup for qgroup_account_snapshot btrfs: qgroup: Fix qgroup accounting when creating snapshot Btrfs: fix fspath error deallocation btrfs: make find_workspace warn if there are no workspaces btrfs: make find_workspace always succeed ...
This commit is contained in:
commit
07be1337b9
@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
|
||||
|
||||
ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
|
||||
if (!ifp) {
|
||||
kfree(fspath);
|
||||
vfree(fspath);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
|
@ -196,6 +196,16 @@ struct btrfs_inode {
|
||||
struct list_head delayed_iput;
|
||||
long delayed_iput_count;
|
||||
|
||||
/*
|
||||
* To avoid races between lockless (i_mutex not held) direct IO writes
|
||||
* and concurrent fsync requests. Direct IO writes must acquire read
|
||||
* access on this semaphore for creating an extent map and its
|
||||
* corresponding ordered extent. The fast fsync path must acquire write
|
||||
* access on this semaphore before it collects ordered extents and
|
||||
* extent maps.
|
||||
*/
|
||||
struct rw_semaphore dio_sem;
|
||||
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
|
@ -743,8 +743,11 @@ out:
|
||||
static struct {
|
||||
struct list_head idle_ws;
|
||||
spinlock_t ws_lock;
|
||||
int num_ws;
|
||||
atomic_t alloc_ws;
|
||||
/* Number of free workspaces */
|
||||
int free_ws;
|
||||
/* Total number of allocated workspaces */
|
||||
atomic_t total_ws;
|
||||
/* Waiters for a free workspace */
|
||||
wait_queue_head_t ws_wait;
|
||||
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
|
||||
|
||||
@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
struct list_head *workspace;
|
||||
|
||||
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
|
||||
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
|
||||
atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
|
||||
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
|
||||
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
|
||||
|
||||
/*
|
||||
* Preallocate one workspace for each compression type so
|
||||
* we can guarantee forward progress in the worst case
|
||||
*/
|
||||
workspace = btrfs_compress_op[i]->alloc_workspace();
|
||||
if (IS_ERR(workspace)) {
|
||||
printk(KERN_WARNING
|
||||
"BTRFS: cannot preallocate compression workspace, will try later");
|
||||
} else {
|
||||
atomic_set(&btrfs_comp_ws[i].total_ws, 1);
|
||||
btrfs_comp_ws[i].free_ws = 1;
|
||||
list_add(workspace, &btrfs_comp_ws[i].idle_ws);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* this finds an available workspace or allocates a new one
|
||||
* ERR_PTR is returned if things go bad.
|
||||
* This finds an available workspace or allocates a new one.
|
||||
* If it's not possible to allocate a new one, waits until there's one.
|
||||
* Preallocation makes a forward progress guarantees and we do not return
|
||||
* errors.
|
||||
*/
|
||||
static struct list_head *find_workspace(int type)
|
||||
{
|
||||
@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
|
||||
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
|
||||
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *num_ws = &btrfs_comp_ws[idx].num_ws;
|
||||
int *free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
again:
|
||||
spin_lock(ws_lock);
|
||||
if (!list_empty(idle_ws)) {
|
||||
workspace = idle_ws->next;
|
||||
list_del(workspace);
|
||||
(*num_ws)--;
|
||||
(*free_ws)--;
|
||||
spin_unlock(ws_lock);
|
||||
return workspace;
|
||||
|
||||
}
|
||||
if (atomic_read(alloc_ws) > cpus) {
|
||||
if (atomic_read(total_ws) > cpus) {
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
spin_unlock(ws_lock);
|
||||
prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(alloc_ws) > cpus && !*num_ws)
|
||||
if (atomic_read(total_ws) > cpus && !*free_ws)
|
||||
schedule();
|
||||
finish_wait(ws_wait, &wait);
|
||||
goto again;
|
||||
}
|
||||
atomic_inc(alloc_ws);
|
||||
atomic_inc(total_ws);
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
if (IS_ERR(workspace)) {
|
||||
atomic_dec(alloc_ws);
|
||||
atomic_dec(total_ws);
|
||||
wake_up(ws_wait);
|
||||
|
||||
/*
|
||||
* Do not return the error but go back to waiting. There's a
|
||||
* workspace preallocated for each type and the compression
|
||||
* time is bounded so we get to a workspace eventually. This
|
||||
* makes our caller's life easier.
|
||||
*
|
||||
* To prevent silent and low-probability deadlocks (when the
|
||||
* initial preallocation fails), check if there are any
|
||||
* workspaces at all.
|
||||
*/
|
||||
if (atomic_read(total_ws) == 0) {
|
||||
static DEFINE_RATELIMIT_STATE(_rs,
|
||||
/* once per minute */ 60 * HZ,
|
||||
/* no burst */ 1);
|
||||
|
||||
if (__ratelimit(&_rs)) {
|
||||
printk(KERN_WARNING
|
||||
"no compression workspaces, low memory, retrying");
|
||||
}
|
||||
}
|
||||
goto again;
|
||||
}
|
||||
return workspace;
|
||||
}
|
||||
@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
|
||||
int idx = type - 1;
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws;
|
||||
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *num_ws = &btrfs_comp_ws[idx].num_ws;
|
||||
int *free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
|
||||
spin_lock(ws_lock);
|
||||
if (*num_ws < num_online_cpus()) {
|
||||
if (*free_ws < num_online_cpus()) {
|
||||
list_add(workspace, idle_ws);
|
||||
(*num_ws)++;
|
||||
(*free_ws)++;
|
||||
spin_unlock(ws_lock);
|
||||
goto wake;
|
||||
}
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
atomic_dec(alloc_ws);
|
||||
atomic_dec(total_ws);
|
||||
wake:
|
||||
/*
|
||||
* Make sure counter is updated before we wake up waiters.
|
||||
@ -857,7 +900,7 @@ static void free_workspaces(void)
|
||||
workspace = btrfs_comp_ws[i].idle_ws.next;
|
||||
list_del(workspace);
|
||||
btrfs_compress_op[i]->free_workspace(workspace);
|
||||
atomic_dec(&btrfs_comp_ws[i].alloc_ws);
|
||||
atomic_dec(&btrfs_comp_ws[i].total_ws);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return PTR_ERR(workspace);
|
||||
|
||||
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
|
||||
start, len, pages,
|
||||
@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return PTR_ERR(workspace);
|
||||
|
||||
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
|
||||
disk_start,
|
||||
@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return PTR_ERR(workspace);
|
||||
|
||||
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
|
||||
dest_page, start_byte,
|
||||
|
@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
if (refs == 0) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, ret, NULL);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
child = read_node_slot(root, mid, 0);
|
||||
if (!child) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, ret, NULL);
|
||||
goto enospc;
|
||||
}
|
||||
|
||||
@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (!left) {
|
||||
ret = -EROFS;
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, ret, NULL);
|
||||
goto enospc;
|
||||
}
|
||||
wret = balance_node_right(trans, root, mid, left);
|
||||
|
1123
fs/btrfs/ctree.h
1123
fs/btrfs/ctree.h
File diff suppressed because it is too large
Load Diff
@ -134,7 +134,7 @@ again:
|
||||
/* cached in the btrfs inode and can be accessed */
|
||||
atomic_add(2, &node->refs);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret) {
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
return ERR_PTR(ret);
|
||||
|
@ -44,9 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *srcdev,
|
||||
struct btrfs_device *tgtdev);
|
||||
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
|
||||
char *srcdev_name,
|
||||
struct btrfs_device **device);
|
||||
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
|
||||
static int btrfs_dev_replace_kthread(void *data);
|
||||
static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
|
||||
@ -305,8 +302,8 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
|
||||
dev_replace->cursor_left_last_write_of_item;
|
||||
}
|
||||
|
||||
int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
struct btrfs_ioctl_dev_replace_args *args)
|
||||
int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name,
|
||||
u64 srcdevid, char *srcdev_name, int read_src)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
@ -315,29 +312,16 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
struct btrfs_device *tgt_device = NULL;
|
||||
struct btrfs_device *src_device = NULL;
|
||||
|
||||
switch (args->start.cont_reading_from_srcdev_mode) {
|
||||
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
|
||||
args->start.tgtdev_name[0] == '\0')
|
||||
return -EINVAL;
|
||||
|
||||
/* the disk copy procedure reuses the scrub code */
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
|
||||
args->start.srcdev_name,
|
||||
&src_device);
|
||||
ret = btrfs_find_device_by_devspec(root, srcdevid,
|
||||
srcdev_name, &src_device);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
|
||||
ret = btrfs_init_dev_replace_tgtdev(root, tgtdev_name,
|
||||
src_device, &tgt_device);
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
if (ret)
|
||||
@ -364,18 +348,17 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
break;
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
|
||||
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
dev_replace->cont_reading_from_srcdev_mode =
|
||||
args->start.cont_reading_from_srcdev_mode;
|
||||
dev_replace->cont_reading_from_srcdev_mode = read_src;
|
||||
WARN_ON(!src_device);
|
||||
dev_replace->srcdev = src_device;
|
||||
WARN_ON(!tgt_device);
|
||||
dev_replace->tgtdev = tgt_device;
|
||||
|
||||
btrfs_info_in_rcu(root->fs_info,
|
||||
btrfs_info_in_rcu(fs_info,
|
||||
"dev_replace from %s (devid %llu) to %s started",
|
||||
src_device->missing ? "<missing disk>" :
|
||||
rcu_str_deref(src_device->name),
|
||||
@ -396,14 +379,13 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
atomic64_set(&dev_replace->num_write_errors, 0);
|
||||
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
|
||||
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
|
||||
btrfs_dev_replace_unlock(dev_replace, 1);
|
||||
|
||||
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
|
||||
if (ret)
|
||||
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
|
||||
btrfs_err(fs_info, "kobj add dev failed %d\n", ret);
|
||||
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1);
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
|
||||
|
||||
/* force writing the updated state information to disk */
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
@ -421,11 +403,9 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
btrfs_device_get_total_bytes(src_device),
|
||||
&dev_replace->scrub_progress, 0, 1);
|
||||
|
||||
ret = btrfs_dev_replace_finishing(root->fs_info, ret);
|
||||
/* don't warn if EINPROGRESS, someone else might be running scrub */
|
||||
ret = btrfs_dev_replace_finishing(fs_info, ret);
|
||||
if (ret == -EINPROGRESS) {
|
||||
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS;
|
||||
ret = 0;
|
||||
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS;
|
||||
} else {
|
||||
WARN_ON(ret);
|
||||
}
|
||||
@ -440,6 +420,35 @@ leave:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_dev_replace_by_ioctl(struct btrfs_root *root,
|
||||
struct btrfs_ioctl_dev_replace_args *args)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (args->start.cont_reading_from_srcdev_mode) {
|
||||
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
|
||||
args->start.tgtdev_name[0] == '\0')
|
||||
return -EINVAL;
|
||||
|
||||
ret = btrfs_dev_replace_start(root, args->start.tgtdev_name,
|
||||
args->start.srcdevid,
|
||||
args->start.srcdev_name,
|
||||
args->start.cont_reading_from_srcdev_mode);
|
||||
args->result = ret;
|
||||
/* don't warn if EINPROGRESS, someone else might be running scrub */
|
||||
if (ret == BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS)
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* blocked until all flighting bios are finished.
|
||||
*/
|
||||
@ -495,7 +504,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return ret;
|
||||
}
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1);
|
||||
btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
@ -560,10 +569,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
ASSERT(list_empty(&src_device->resized_list));
|
||||
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
|
||||
tgt_device->commit_bytes_used = src_device->bytes_used;
|
||||
if (fs_info->sb->s_bdev == src_device->bdev)
|
||||
fs_info->sb->s_bdev = tgt_device->bdev;
|
||||
if (fs_info->fs_devices->latest_bdev == src_device->bdev)
|
||||
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
|
||||
|
||||
btrfs_assign_next_active_device(fs_info, src_device, tgt_device);
|
||||
|
||||
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
|
||||
fs_info->fs_devices->rw_devices++;
|
||||
|
||||
@ -626,25 +634,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
|
||||
write_unlock(&em_tree->lock);
|
||||
}
|
||||
|
||||
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
|
||||
char *srcdev_name,
|
||||
struct btrfs_device **device)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (srcdevid) {
|
||||
ret = 0;
|
||||
*device = btrfs_find_device(root->fs_info, srcdevid, NULL,
|
||||
NULL);
|
||||
if (!*device)
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
ret = btrfs_find_device_missing_or_by_path(root, srcdev_name,
|
||||
device);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_dev_replace_args *args)
|
||||
{
|
||||
|
@ -25,8 +25,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_dev_replace_start(struct btrfs_root *root,
|
||||
int btrfs_dev_replace_by_ioctl(struct btrfs_root *root,
|
||||
struct btrfs_ioctl_dev_replace_args *args);
|
||||
int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name,
|
||||
u64 srcdevid, char *srcdev_name, int read_src);
|
||||
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_dev_replace_args *args);
|
||||
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
|
||||
|
@ -1640,7 +1640,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -2417,7 +2417,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
/* returns with log_tree_root freed on success */
|
||||
ret = btrfs_recover_log_trees(log_tree_root);
|
||||
if (ret) {
|
||||
btrfs_std_error(tree_root->fs_info, ret,
|
||||
btrfs_handle_fs_error(tree_root->fs_info, ret,
|
||||
"Failed to recover log tree");
|
||||
free_extent_buffer(log_tree_root->node);
|
||||
kfree(log_tree_root);
|
||||
@ -2517,6 +2517,7 @@ int open_ctree(struct super_block *sb,
|
||||
int num_backups_tried = 0;
|
||||
int backup_index = 0;
|
||||
int max_active;
|
||||
bool cleaner_mutex_locked = false;
|
||||
|
||||
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
|
||||
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
|
||||
@ -2713,7 +2714,7 @@ int open_ctree(struct super_block *sb,
|
||||
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
|
||||
*/
|
||||
if (btrfs_check_super_csum(bh->b_data)) {
|
||||
printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
|
||||
btrfs_err(fs_info, "superblock checksum mismatch");
|
||||
err = -EINVAL;
|
||||
brelse(bh);
|
||||
goto fail_alloc;
|
||||
@ -2733,7 +2734,7 @@ int open_ctree(struct super_block *sb,
|
||||
|
||||
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
|
||||
btrfs_err(fs_info, "superblock contains fatal errors");
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
@ -2768,9 +2769,9 @@ int open_ctree(struct super_block *sb,
|
||||
features = btrfs_super_incompat_flags(disk_super) &
|
||||
~BTRFS_FEATURE_INCOMPAT_SUPP;
|
||||
if (features) {
|
||||
printk(KERN_ERR "BTRFS: couldn't mount because of "
|
||||
"unsupported optional features (%Lx).\n",
|
||||
features);
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unsupported optional features (%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
@ -2781,7 +2782,7 @@ int open_ctree(struct super_block *sb,
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
|
||||
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
|
||||
printk(KERN_INFO "BTRFS: has skinny extents\n");
|
||||
btrfs_info(fs_info, "has skinny extents");
|
||||
|
||||
/*
|
||||
* flag our filesystem as having big metadata blocks if
|
||||
@ -2789,7 +2790,8 @@ int open_ctree(struct super_block *sb,
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
|
||||
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
|
||||
printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
|
||||
btrfs_info(fs_info,
|
||||
"flagging fs with big metadata feature");
|
||||
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
}
|
||||
|
||||
@ -2805,9 +2807,9 @@ int open_ctree(struct super_block *sb,
|
||||
*/
|
||||
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(sectorsize != nodesize)) {
|
||||
printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
|
||||
"are not allowed for mixed block groups on %s\n",
|
||||
sb->s_id);
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
nodesize, sectorsize);
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
@ -2820,8 +2822,8 @@ int open_ctree(struct super_block *sb,
|
||||
features = btrfs_super_compat_ro_flags(disk_super) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP;
|
||||
if (!(sb->s_flags & MS_RDONLY) && features) {
|
||||
printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
|
||||
"unsupported option features (%Lx).\n",
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unsupported optional features (%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
@ -2850,8 +2852,7 @@ int open_ctree(struct super_block *sb,
|
||||
ret = btrfs_read_sys_array(tree_root);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: failed to read the system "
|
||||
"array on %s\n", sb->s_id);
|
||||
btrfs_err(fs_info, "failed to read the system array: %d", ret);
|
||||
goto fail_sb_buffer;
|
||||
}
|
||||
|
||||
@ -2865,8 +2866,7 @@ int open_ctree(struct super_block *sb,
|
||||
generation);
|
||||
if (IS_ERR(chunk_root->node) ||
|
||||
!extent_buffer_uptodate(chunk_root->node)) {
|
||||
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
|
||||
sb->s_id);
|
||||
btrfs_err(fs_info, "failed to read chunk root");
|
||||
if (!IS_ERR(chunk_root->node))
|
||||
free_extent_buffer(chunk_root->node);
|
||||
chunk_root->node = NULL;
|
||||
@ -2880,8 +2880,7 @@ int open_ctree(struct super_block *sb,
|
||||
|
||||
ret = btrfs_read_chunk_tree(chunk_root);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
|
||||
sb->s_id);
|
||||
btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
|
||||
goto fail_tree_roots;
|
||||
}
|
||||
|
||||
@ -2892,8 +2891,7 @@ int open_ctree(struct super_block *sb,
|
||||
btrfs_close_extra_devices(fs_devices, 0);
|
||||
|
||||
if (!fs_devices->latest_bdev) {
|
||||
printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
|
||||
sb->s_id);
|
||||
btrfs_err(fs_info, "failed to read devices");
|
||||
goto fail_tree_roots;
|
||||
}
|
||||
|
||||
@ -2905,8 +2903,7 @@ retry_root_backup:
|
||||
generation);
|
||||
if (IS_ERR(tree_root->node) ||
|
||||
!extent_buffer_uptodate(tree_root->node)) {
|
||||
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
|
||||
sb->s_id);
|
||||
btrfs_warn(fs_info, "failed to read tree root");
|
||||
if (!IS_ERR(tree_root->node))
|
||||
free_extent_buffer(tree_root->node);
|
||||
tree_root->node = NULL;
|
||||
@ -2938,20 +2935,19 @@ retry_root_backup:
|
||||
|
||||
ret = btrfs_recover_balance(fs_info);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: failed to recover balance\n");
|
||||
btrfs_err(fs_info, "failed to recover balance: %d", ret);
|
||||
goto fail_block_groups;
|
||||
}
|
||||
|
||||
ret = btrfs_init_dev_stats(fs_info);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
|
||||
ret);
|
||||
btrfs_err(fs_info, "failed to init dev_stats: %d", ret);
|
||||
goto fail_block_groups;
|
||||
}
|
||||
|
||||
ret = btrfs_init_dev_replace(fs_info);
|
||||
if (ret) {
|
||||
pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
|
||||
goto fail_block_groups;
|
||||
}
|
||||
|
||||
@ -2959,31 +2955,33 @@ retry_root_backup:
|
||||
|
||||
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
|
||||
if (ret) {
|
||||
pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
|
||||
ret);
|
||||
goto fail_block_groups;
|
||||
}
|
||||
|
||||
ret = btrfs_sysfs_add_device(fs_devices);
|
||||
if (ret) {
|
||||
pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to init sysfs device interface: %d",
|
||||
ret);
|
||||
goto fail_fsdev_sysfs;
|
||||
}
|
||||
|
||||
ret = btrfs_sysfs_add_mounted(fs_info);
|
||||
if (ret) {
|
||||
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
|
||||
goto fail_fsdev_sysfs;
|
||||
}
|
||||
|
||||
ret = btrfs_init_space_info(fs_info);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to initialize space info: %d", ret);
|
||||
goto fail_sysfs;
|
||||
}
|
||||
|
||||
ret = btrfs_read_block_groups(fs_info->extent_root);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
|
||||
btrfs_err(fs_info, "failed to read block groups: %d", ret);
|
||||
goto fail_sysfs;
|
||||
}
|
||||
fs_info->num_tolerated_disk_barrier_failures =
|
||||
@ -2991,12 +2989,20 @@ retry_root_backup:
|
||||
if (fs_info->fs_devices->missing_devices >
|
||||
fs_info->num_tolerated_disk_barrier_failures &&
|
||||
!(sb->s_flags & MS_RDONLY)) {
|
||||
pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
|
||||
btrfs_warn(fs_info,
|
||||
"missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed",
|
||||
fs_info->fs_devices->missing_devices,
|
||||
fs_info->num_tolerated_disk_barrier_failures);
|
||||
goto fail_sysfs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the cleaner_mutex thread here so that we don't block
|
||||
* for a long time on btrfs_recover_relocation. cleaner_kthread
|
||||
* will wait for us to finish mounting the filesystem.
|
||||
*/
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
cleaner_mutex_locked = true;
|
||||
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
|
||||
"btrfs-cleaner");
|
||||
if (IS_ERR(fs_info->cleaner_kthread))
|
||||
@ -3011,8 +3017,7 @@ retry_root_backup:
|
||||
if (!btrfs_test_opt(tree_root, SSD) &&
|
||||
!btrfs_test_opt(tree_root, NOSSD) &&
|
||||
!fs_info->fs_devices->rotating) {
|
||||
printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
|
||||
"mode\n");
|
||||
btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
|
||||
btrfs_set_opt(fs_info->mount_opt, SSD);
|
||||
}
|
||||
|
||||
@ -3030,8 +3035,9 @@ retry_root_backup:
|
||||
1 : 0,
|
||||
fs_info->check_integrity_print_mask);
|
||||
if (ret)
|
||||
printk(KERN_WARNING "BTRFS: failed to initialize"
|
||||
" integrity check module %s\n", sb->s_id);
|
||||
btrfs_warn(fs_info,
|
||||
"failed to initialize integrity check module: %d",
|
||||
ret);
|
||||
}
|
||||
#endif
|
||||
ret = btrfs_read_qgroup_config(fs_info);
|
||||
@ -3056,17 +3062,17 @@ retry_root_backup:
|
||||
ret = btrfs_cleanup_fs_roots(fs_info);
|
||||
if (ret)
|
||||
goto fail_qgroup;
|
||||
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
/* We locked cleaner_mutex before creating cleaner_kthread. */
|
||||
ret = btrfs_recover_relocation(tree_root);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
if (ret < 0) {
|
||||
printk(KERN_WARNING
|
||||
"BTRFS: failed to recover relocation\n");
|
||||
btrfs_warn(fs_info, "failed to recover relocation: %d",
|
||||
ret);
|
||||
err = -EINVAL;
|
||||
goto fail_qgroup;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
cleaner_mutex_locked = false;
|
||||
|
||||
location.objectid = BTRFS_FS_TREE_OBJECTID;
|
||||
location.type = BTRFS_ROOT_ITEM_KEY;
|
||||
@ -3083,11 +3089,11 @@ retry_root_backup:
|
||||
|
||||
if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
|
||||
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
|
||||
pr_info("BTRFS: creating free space tree\n");
|
||||
btrfs_info(fs_info, "creating free space tree");
|
||||
ret = btrfs_create_free_space_tree(fs_info);
|
||||
if (ret) {
|
||||
pr_warn("BTRFS: failed to create free space tree %d\n",
|
||||
ret);
|
||||
btrfs_warn(fs_info,
|
||||
"failed to create free space tree: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
@ -3104,14 +3110,14 @@ retry_root_backup:
|
||||
|
||||
ret = btrfs_resume_balance_async(fs_info);
|
||||
if (ret) {
|
||||
printk(KERN_WARNING "BTRFS: failed to resume balance\n");
|
||||
btrfs_warn(fs_info, "failed to resume balance: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_resume_dev_replace_async(fs_info);
|
||||
if (ret) {
|
||||
pr_warn("BTRFS: failed to resume dev_replace\n");
|
||||
btrfs_warn(fs_info, "failed to resume device replace: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
@ -3120,33 +3126,33 @@ retry_root_backup:
|
||||
|
||||
if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
|
||||
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
|
||||
pr_info("BTRFS: clearing free space tree\n");
|
||||
btrfs_info(fs_info, "clearing free space tree");
|
||||
ret = btrfs_clear_free_space_tree(fs_info);
|
||||
if (ret) {
|
||||
pr_warn("BTRFS: failed to clear free space tree %d\n",
|
||||
ret);
|
||||
btrfs_warn(fs_info,
|
||||
"failed to clear free space tree: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs_info->uuid_root) {
|
||||
pr_info("BTRFS: creating UUID tree\n");
|
||||
btrfs_info(fs_info, "creating UUID tree");
|
||||
ret = btrfs_create_uuid_tree(fs_info);
|
||||
if (ret) {
|
||||
pr_warn("BTRFS: failed to create the UUID tree %d\n",
|
||||
ret);
|
||||
btrfs_warn(fs_info,
|
||||
"failed to create the UUID tree: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
} else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
|
||||
fs_info->generation !=
|
||||
btrfs_super_uuid_tree_generation(disk_super)) {
|
||||
pr_info("BTRFS: checking UUID tree\n");
|
||||
btrfs_info(fs_info, "checking UUID tree");
|
||||
ret = btrfs_check_uuid_tree(fs_info);
|
||||
if (ret) {
|
||||
pr_warn("BTRFS: failed to check the UUID tree %d\n",
|
||||
ret);
|
||||
btrfs_warn(fs_info,
|
||||
"failed to check the UUID tree: %d", ret);
|
||||
close_ctree(tree_root);
|
||||
return ret;
|
||||
}
|
||||
@ -3180,6 +3186,10 @@ fail_cleaner:
|
||||
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
|
||||
|
||||
fail_sysfs:
|
||||
if (cleaner_mutex_locked) {
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
cleaner_mutex_locked = false;
|
||||
}
|
||||
btrfs_sysfs_remove_mounted(fs_info);
|
||||
|
||||
fail_fsdev_sysfs:
|
||||
@ -3646,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
if (ret) {
|
||||
mutex_unlock(
|
||||
&root->fs_info->fs_devices->device_list_mutex);
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
btrfs_handle_fs_error(root->fs_info, ret,
|
||||
"errors while submitting device barriers.");
|
||||
return ret;
|
||||
}
|
||||
@ -3686,7 +3696,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
/* FUA is masked off if unsupported and can't be the reason */
|
||||
btrfs_std_error(root->fs_info, -EIO,
|
||||
btrfs_handle_fs_error(root->fs_info, -EIO,
|
||||
"%d errors while writing supers", total_errors);
|
||||
return -EIO;
|
||||
}
|
||||
@ -3704,7 +3714,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
|
||||
}
|
||||
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
|
||||
if (total_errors > max_errors) {
|
||||
btrfs_std_error(root->fs_info, -EIO,
|
||||
btrfs_handle_fs_error(root->fs_info, -EIO,
|
||||
"%d errors while writing supers", total_errors);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
|
||||
return readonly;
|
||||
}
|
||||
|
||||
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
bool ret = true;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
if (!bg)
|
||||
return false;
|
||||
|
||||
spin_lock(&bg->lock);
|
||||
if (bg->ro)
|
||||
ret = false;
|
||||
else
|
||||
atomic_inc(&bg->nocow_writers);
|
||||
spin_unlock(&bg->lock);
|
||||
|
||||
/* no put on block group, done by btrfs_dec_nocow_writers */
|
||||
if (!ret)
|
||||
btrfs_put_block_group(bg);
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
ASSERT(bg);
|
||||
if (atomic_dec_and_test(&bg->nocow_writers))
|
||||
wake_up_atomic_t(&bg->nocow_writers);
|
||||
/*
|
||||
* Once for our lookup and once for the lookup done by a previous call
|
||||
* to btrfs_inc_nocow_writers()
|
||||
*/
|
||||
btrfs_put_block_group(bg);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
|
||||
{
|
||||
schedule();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
wait_on_atomic_t(&bg->nocow_writers,
|
||||
btrfs_wait_nocow_writers_atomic_t,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static const char *alloc_name(u64 flags)
|
||||
{
|
||||
switch (flags) {
|
||||
@ -4141,7 +4194,7 @@ commit_trans:
|
||||
|
||||
if (need_commit > 0) {
|
||||
btrfs_start_delalloc_roots(fs_info, 0, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
}
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
|
||||
*/
|
||||
btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
|
||||
if (!current->journal_info)
|
||||
btrfs_wait_ordered_roots(root->fs_info, nr_items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, nr_items,
|
||||
0, (u64)-1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4620,7 +4674,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
|
||||
|
||||
/* Calc the number of the pages we need flush for space reservation */
|
||||
items = calc_reclaim_items_nr(root, to_reclaim);
|
||||
to_reclaim = items * EXTENT_SIZE_PER_ITEM;
|
||||
to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
|
||||
|
||||
trans = (struct btrfs_trans_handle *)current->journal_info;
|
||||
block_rsv = &root->fs_info->delalloc_block_rsv;
|
||||
@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
|
||||
if (trans)
|
||||
return;
|
||||
if (wait_ordered)
|
||||
btrfs_wait_ordered_roots(root->fs_info, items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, items,
|
||||
0, (u64)-1);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -4671,7 +4726,8 @@ skip_async:
|
||||
|
||||
loops++;
|
||||
if (wait_ordered && !trans) {
|
||||
btrfs_wait_ordered_roots(root->fs_info, items);
|
||||
btrfs_wait_ordered_roots(root->fs_info, items,
|
||||
0, (u64)-1);
|
||||
} else {
|
||||
time_left = schedule_timeout_killable(1);
|
||||
if (time_left)
|
||||
@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
atomic_inc(&bg->reservations);
|
||||
}
|
||||
|
||||
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
|
||||
const u64 start)
|
||||
{
|
||||
struct btrfs_block_group_cache *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, start);
|
||||
ASSERT(bg);
|
||||
if (atomic_dec_and_test(&bg->reservations))
|
||||
wake_up_atomic_t(&bg->reservations);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
|
||||
{
|
||||
schedule();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
|
||||
{
|
||||
struct btrfs_space_info *space_info = bg->space_info;
|
||||
|
||||
ASSERT(bg->ro);
|
||||
|
||||
if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Our block group is read only but before we set it to read only,
|
||||
* some task might have had allocated an extent from it already, but it
|
||||
* has not yet created a respective ordered extent (and added it to a
|
||||
* root's list of ordered extents).
|
||||
* Therefore wait for any task currently allocating extents, since the
|
||||
* block group's reservations counter is incremented while a read lock
|
||||
* on the groups' semaphore is held and decremented after releasing
|
||||
* the read access on that semaphore and creating the ordered extent.
|
||||
*/
|
||||
down_write(&space_info->groups_sem);
|
||||
up_write(&space_info->groups_sem);
|
||||
|
||||
wait_on_atomic_t(&bg->reservations,
|
||||
btrfs_wait_bg_reservations_atomic_t,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_update_reserved_bytes - update the block_group and space info counters
|
||||
* @cache: The cache we are manipulating
|
||||
@ -7025,36 +7132,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
|
||||
int delalloc)
|
||||
{
|
||||
struct btrfs_block_group_cache *used_bg = NULL;
|
||||
bool locked = false;
|
||||
again:
|
||||
|
||||
spin_lock(&cluster->refill_lock);
|
||||
if (locked) {
|
||||
while (1) {
|
||||
used_bg = cluster->block_group;
|
||||
if (!used_bg)
|
||||
return NULL;
|
||||
|
||||
if (used_bg == block_group)
|
||||
return used_bg;
|
||||
|
||||
btrfs_get_block_group(used_bg);
|
||||
|
||||
if (!delalloc)
|
||||
return used_bg;
|
||||
|
||||
if (down_read_trylock(&used_bg->data_rwsem))
|
||||
return used_bg;
|
||||
|
||||
spin_unlock(&cluster->refill_lock);
|
||||
|
||||
down_read(&used_bg->data_rwsem);
|
||||
|
||||
spin_lock(&cluster->refill_lock);
|
||||
if (used_bg == cluster->block_group)
|
||||
return used_bg;
|
||||
|
||||
up_read(&used_bg->data_rwsem);
|
||||
btrfs_put_block_group(used_bg);
|
||||
}
|
||||
|
||||
used_bg = cluster->block_group;
|
||||
if (!used_bg)
|
||||
return NULL;
|
||||
|
||||
if (used_bg == block_group)
|
||||
return used_bg;
|
||||
|
||||
btrfs_get_block_group(used_bg);
|
||||
|
||||
if (!delalloc)
|
||||
return used_bg;
|
||||
|
||||
if (down_read_trylock(&used_bg->data_rwsem))
|
||||
return used_bg;
|
||||
|
||||
spin_unlock(&cluster->refill_lock);
|
||||
down_read(&used_bg->data_rwsem);
|
||||
locked = true;
|
||||
goto again;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -7431,6 +7537,7 @@ checks:
|
||||
btrfs_add_free_space(block_group, offset, num_bytes);
|
||||
goto loop;
|
||||
}
|
||||
btrfs_inc_block_group_reservations(block_group);
|
||||
|
||||
/* we are all good, lets return */
|
||||
ins->objectid = search_start;
|
||||
@ -7612,8 +7719,10 @@ again:
|
||||
WARN_ON(num_bytes < root->sectorsize);
|
||||
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
|
||||
flags, delalloc);
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
if (!ret && !is_data) {
|
||||
btrfs_dec_block_group_reservations(root->fs_info,
|
||||
ins->objectid);
|
||||
} else if (ret == -ENOSPC) {
|
||||
if (!final_tried && ins->offset) {
|
||||
num_bytes = min(num_bytes >> 1, ins->offset);
|
||||
num_bytes = round_down(num_bytes, root->sectorsize);
|
||||
@ -9058,7 +9167,7 @@ out:
|
||||
if (!for_reloc && root_dropped == false)
|
||||
btrfs_add_dead_root(root);
|
||||
if (err && err != -EAGAIN)
|
||||
btrfs_std_error(root->fs_info, err, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, err, NULL);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -3200,14 +3200,10 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void update_nr_written(struct page *page,
|
||||
struct writeback_control *wbc,
|
||||
unsigned long nr_written)
|
||||
static void update_nr_written(struct page *page, struct writeback_control *wbc,
|
||||
unsigned long nr_written)
|
||||
{
|
||||
wbc->nr_to_write -= nr_written;
|
||||
if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
|
||||
wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
|
||||
page->mapping->writeback_index = page->index + nr_written;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3368,6 +3364,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
|
||||
while (cur <= end) {
|
||||
u64 em_end;
|
||||
unsigned long max_nr;
|
||||
|
||||
if (cur >= i_size) {
|
||||
if (tree->ops && tree->ops->writepage_end_io_hook)
|
||||
tree->ops->writepage_end_io_hook(page, cur,
|
||||
@ -3423,32 +3421,23 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tree->ops && tree->ops->writepage_io_hook) {
|
||||
ret = tree->ops->writepage_io_hook(page, cur,
|
||||
cur + iosize - 1);
|
||||
} else {
|
||||
ret = 0;
|
||||
max_nr = (i_size >> PAGE_SHIFT) + 1;
|
||||
|
||||
set_range_writeback(tree, cur, cur + iosize - 1);
|
||||
if (!PageWriteback(page)) {
|
||||
btrfs_err(BTRFS_I(inode)->root->fs_info,
|
||||
"page %lu not writeback, cur %llu end %llu",
|
||||
page->index, cur, end);
|
||||
}
|
||||
if (ret) {
|
||||
|
||||
ret = submit_extent_page(write_flags, tree, wbc, page,
|
||||
sector, iosize, pg_offset,
|
||||
bdev, &epd->bio, max_nr,
|
||||
end_bio_extent_writepage,
|
||||
0, 0, 0, false);
|
||||
if (ret)
|
||||
SetPageError(page);
|
||||
} else {
|
||||
unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1;
|
||||
|
||||
set_range_writeback(tree, cur, cur + iosize - 1);
|
||||
if (!PageWriteback(page)) {
|
||||
btrfs_err(BTRFS_I(inode)->root->fs_info,
|
||||
"page %lu not writeback, cur %llu end %llu",
|
||||
page->index, cur, end);
|
||||
}
|
||||
|
||||
ret = submit_extent_page(write_flags, tree, wbc, page,
|
||||
sector, iosize, pg_offset,
|
||||
bdev, &epd->bio, max_nr,
|
||||
end_bio_extent_writepage,
|
||||
0, 0, 0, false);
|
||||
if (ret)
|
||||
SetPageError(page);
|
||||
}
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
nr++;
|
||||
@ -3920,12 +3909,13 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0;
|
||||
int done = 0;
|
||||
int err = 0;
|
||||
int nr_to_write_done = 0;
|
||||
struct pagevec pvec;
|
||||
int nr_pages;
|
||||
pgoff_t index;
|
||||
pgoff_t end; /* Inclusive */
|
||||
pgoff_t done_index;
|
||||
int range_whole = 0;
|
||||
int scanned = 0;
|
||||
int tag;
|
||||
|
||||
@ -3948,6 +3938,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
|
||||
} else {
|
||||
index = wbc->range_start >> PAGE_SHIFT;
|
||||
end = wbc->range_end >> PAGE_SHIFT;
|
||||
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
||||
range_whole = 1;
|
||||
scanned = 1;
|
||||
}
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
@ -3957,6 +3949,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
|
||||
retry:
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
tag_pages_for_writeback(mapping, index, end);
|
||||
done_index = index;
|
||||
while (!done && !nr_to_write_done && (index <= end) &&
|
||||
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
|
||||
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
|
||||
@ -3966,6 +3959,7 @@ retry:
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
done_index = page->index;
|
||||
/*
|
||||
* At this point we hold neither mapping->tree_lock nor
|
||||
* lock on the page itself: the page may be truncated or
|
||||
@ -4007,8 +4001,20 @@ retry:
|
||||
unlock_page(page);
|
||||
ret = 0;
|
||||
}
|
||||
if (!err && ret < 0)
|
||||
err = ret;
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* done_index is set past this page,
|
||||
* so media errors will not choke
|
||||
* background writeout for the entire
|
||||
* file. This has consequences for
|
||||
* range_cyclic semantics (ie. it may
|
||||
* not be suitable for data integrity
|
||||
* writeout).
|
||||
*/
|
||||
done_index = page->index + 1;
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* the filesystem may choose to bump up nr_to_write.
|
||||
@ -4020,7 +4026,7 @@ retry:
|
||||
pagevec_release(&pvec);
|
||||
cond_resched();
|
||||
}
|
||||
if (!scanned && !done && !err) {
|
||||
if (!scanned && !done) {
|
||||
/*
|
||||
* We hit the last page and there is more work to be done: wrap
|
||||
* back to the start of the file
|
||||
@ -4029,8 +4035,12 @@ retry:
|
||||
index = 0;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
|
||||
mapping->writeback_index = done_index;
|
||||
|
||||
btrfs_add_delayed_iput(inode);
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void flush_epd_write_bio(struct extent_page_data *epd)
|
||||
@ -4822,7 +4832,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
return NULL;
|
||||
eb->fs_info = fs_info;
|
||||
again:
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
goto free_eb;
|
||||
spin_lock(&fs_info->buffer_lock);
|
||||
@ -4923,7 +4933,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
if (uptodate)
|
||||
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
|
||||
again:
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
goto free_eb;
|
||||
|
||||
|
@ -71,7 +71,6 @@ struct extent_io_ops {
|
||||
u64 start, u64 end, int *page_started,
|
||||
unsigned long *nr_written);
|
||||
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
|
||||
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
|
||||
extent_submit_bio_hook_t *submit_bio_hook;
|
||||
int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
|
||||
size_t size, struct bio *bio,
|
||||
|
@ -1696,7 +1696,9 @@ again:
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
btrfs_delalloc_release_metadata(inode, release_bytes);
|
||||
} else {
|
||||
btrfs_delalloc_release_space(inode, pos, release_bytes);
|
||||
btrfs_delalloc_release_space(inode,
|
||||
round_down(pos, root->sectorsize),
|
||||
release_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2952,7 +2954,7 @@ const struct file_operations btrfs_file_operations = {
|
||||
.fallocate = btrfs_fallocate,
|
||||
.unlocked_ioctl = btrfs_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = btrfs_ioctl,
|
||||
.compat_ioctl = btrfs_compat_ioctl,
|
||||
#endif
|
||||
.copy_file_range = btrfs_copy_file_range,
|
||||
.clone_file_range = btrfs_clone_file_range,
|
||||
|
@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
|
||||
name, name_len, &extref)) {
|
||||
btrfs_std_error(root->fs_info, -ENOENT, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
468
fs/btrfs/inode.c
468
fs/btrfs/inode.c
@ -824,6 +824,7 @@ retry:
|
||||
async_extent->ram_size - 1, 0);
|
||||
goto out_free_reserve;
|
||||
}
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
/*
|
||||
* clear dirty, set writeback and unlock the pages.
|
||||
@ -861,6 +862,7 @@ retry:
|
||||
}
|
||||
return;
|
||||
out_free_reserve:
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
out_free:
|
||||
extent_clear_unlock_delalloc(inode, async_extent->start,
|
||||
@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
goto out_drop_extent_cache;
|
||||
}
|
||||
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
if (disk_num_bytes < cur_alloc_size)
|
||||
break;
|
||||
|
||||
@ -1066,6 +1070,7 @@ out:
|
||||
out_drop_extent_cache:
|
||||
btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
|
||||
out_reserve:
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
out_unlock:
|
||||
extent_clear_unlock_delalloc(inode, start, end, locked_page,
|
||||
@ -1377,6 +1382,9 @@ next_slot:
|
||||
*/
|
||||
if (csum_exist_in_range(root, disk_bytenr, num_bytes))
|
||||
goto out_check;
|
||||
if (!btrfs_inc_nocow_writers(root->fs_info,
|
||||
disk_bytenr))
|
||||
goto out_check;
|
||||
nocow = 1;
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
extent_end = found_key.offset +
|
||||
@ -1391,6 +1399,9 @@ out_check:
|
||||
path->slots[0]++;
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info,
|
||||
disk_bytenr);
|
||||
goto next_slot;
|
||||
}
|
||||
if (!nocow) {
|
||||
@ -1411,6 +1422,9 @@ out_check:
|
||||
if (ret) {
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info,
|
||||
disk_bytenr);
|
||||
goto error;
|
||||
}
|
||||
cow_start = (u64)-1;
|
||||
@ -1453,6 +1467,8 @@ out_check:
|
||||
|
||||
ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
|
||||
num_bytes, num_bytes, type);
|
||||
if (nocow)
|
||||
btrfs_dec_nocow_writers(root->fs_info, disk_bytenr);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (root->root_key.objectid ==
|
||||
@ -7129,6 +7145,43 @@ out:
|
||||
return em;
|
||||
}
|
||||
|
||||
static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
|
||||
const u64 start,
|
||||
const u64 len,
|
||||
const u64 orig_start,
|
||||
const u64 block_start,
|
||||
const u64 block_len,
|
||||
const u64 orig_block_len,
|
||||
const u64 ram_bytes,
|
||||
const int type)
|
||||
{
|
||||
struct extent_map *em = NULL;
|
||||
int ret;
|
||||
|
||||
down_read(&BTRFS_I(inode)->dio_sem);
|
||||
if (type != BTRFS_ORDERED_NOCOW) {
|
||||
em = create_pinned_em(inode, start, len, orig_start,
|
||||
block_start, block_len, orig_block_len,
|
||||
ram_bytes, type);
|
||||
if (IS_ERR(em))
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
|
||||
len, block_len, type);
|
||||
if (ret) {
|
||||
if (em) {
|
||||
free_extent_map(em);
|
||||
btrfs_drop_extent_cache(inode, start,
|
||||
start + len - 1, 0);
|
||||
}
|
||||
em = ERR_PTR(ret);
|
||||
}
|
||||
out:
|
||||
up_read(&BTRFS_I(inode)->dio_sem);
|
||||
|
||||
return em;
|
||||
}
|
||||
|
||||
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
/*
|
||||
* Create the ordered extent before the extent map. This is to avoid
|
||||
* races with the fast fsync path that would lead to it logging file
|
||||
* extent items that point to disk extents that were not yet written to.
|
||||
* The fast fsync path collects ordered extents into a local list and
|
||||
* then collects all the new extent maps, so we must create the ordered
|
||||
* extent first and make sure the fast fsync path collects any new
|
||||
* ordered extents after collecting new extent maps as well.
|
||||
* The fsync path simply can not rely on inode_dio_wait() because it
|
||||
* causes deadlock with AIO.
|
||||
*/
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
|
||||
ins.offset, ins.offset, 0);
|
||||
if (ret) {
|
||||
em = btrfs_create_dio_extent(inode, start, ins.offset, start,
|
||||
ins.objectid, ins.offset, ins.offset,
|
||||
ins.offset, 0);
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
if (IS_ERR(em))
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
|
||||
ins.offset, ins.offset, ins.offset, 0);
|
||||
if (IS_ERR(em)) {
|
||||
struct btrfs_ordered_extent *oe;
|
||||
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
|
||||
oe = btrfs_lookup_ordered_extent(inode, start);
|
||||
ASSERT(oe);
|
||||
if (WARN_ON(!oe))
|
||||
return em;
|
||||
set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
|
||||
set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
|
||||
btrfs_remove_ordered_extent(inode, oe);
|
||||
/* Once for our lookup and once for the ordered extents tree. */
|
||||
btrfs_put_ordered_extent(oe);
|
||||
btrfs_put_ordered_extent(oe);
|
||||
}
|
||||
return em;
|
||||
}
|
||||
|
||||
@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
block_start = em->block_start + (start - em->start);
|
||||
|
||||
if (can_nocow_extent(inode, start, &len, &orig_start,
|
||||
&orig_block_len, &ram_bytes) == 1) {
|
||||
&orig_block_len, &ram_bytes) == 1 &&
|
||||
btrfs_inc_nocow_writers(root->fs_info, block_start)) {
|
||||
struct extent_map *em2;
|
||||
|
||||
em2 = btrfs_create_dio_extent(inode, start, len,
|
||||
orig_start, block_start,
|
||||
len, orig_block_len,
|
||||
ram_bytes, type);
|
||||
btrfs_dec_nocow_writers(root->fs_info, block_start);
|
||||
if (type == BTRFS_ORDERED_PREALLOC) {
|
||||
free_extent_map(em);
|
||||
em = create_pinned_em(inode, start, len,
|
||||
orig_start,
|
||||
block_start, len,
|
||||
orig_block_len,
|
||||
ram_bytes, type);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto unlock_err;
|
||||
}
|
||||
em = em2;
|
||||
}
|
||||
|
||||
ret = btrfs_add_ordered_extent_dio(inode, start,
|
||||
block_start, len, len, type);
|
||||
if (ret) {
|
||||
free_extent_map(em);
|
||||
if (em2 && IS_ERR(em2)) {
|
||||
ret = PTR_ERR(em2);
|
||||
goto unlock_err;
|
||||
}
|
||||
goto unlock;
|
||||
@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
INIT_LIST_HEAD(&ei->delalloc_inodes);
|
||||
INIT_LIST_HEAD(&ei->delayed_iput);
|
||||
RB_CLEAR_NODE(&ei->rb_node);
|
||||
init_rwsem(&ei->dio_sem);
|
||||
|
||||
return inode;
|
||||
}
|
||||
@ -9387,18 +9410,290 @@ static int btrfs_getattr(struct vfsmount *mnt,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry)
|
||||
static int btrfs_rename_exchange(struct inode *old_dir,
|
||||
struct dentry *old_dentry,
|
||||
struct inode *new_dir,
|
||||
struct dentry *new_dentry)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_root *root = BTRFS_I(old_dir)->root;
|
||||
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
|
||||
struct inode *new_inode = new_dentry->d_inode;
|
||||
struct inode *old_inode = old_dentry->d_inode;
|
||||
struct timespec ctime = CURRENT_TIME;
|
||||
struct dentry *parent;
|
||||
u64 old_ino = btrfs_ino(old_inode);
|
||||
u64 new_ino = btrfs_ino(new_inode);
|
||||
u64 old_idx = 0;
|
||||
u64 new_idx = 0;
|
||||
u64 root_objectid;
|
||||
int ret;
|
||||
bool root_log_pinned = false;
|
||||
bool dest_log_pinned = false;
|
||||
|
||||
/* we only allow rename subvolume link between subvolumes */
|
||||
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
|
||||
return -EXDEV;
|
||||
|
||||
/* close the race window with snapshot create/destroy ioctl */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
down_read(&root->fs_info->subvol_sem);
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
down_read(&dest->fs_info->subvol_sem);
|
||||
|
||||
/*
|
||||
* We want to reserve the absolute worst case amount of items. So if
|
||||
* both inodes are subvols and we need to unlink them then that would
|
||||
* require 4 item modifications, but if they are both normal inodes it
|
||||
* would require 5 item modifications, so we'll assume their normal
|
||||
* inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
|
||||
* should cover the worst case number of items we'll modify.
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 12);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to find a free sequence number both in the source and
|
||||
* in the destination directory for the exchange.
|
||||
*/
|
||||
ret = btrfs_set_inode_index(new_dir, &old_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
ret = btrfs_set_inode_index(old_dir, &new_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
|
||||
BTRFS_I(old_inode)->dir_index = 0ULL;
|
||||
BTRFS_I(new_inode)->dir_index = 0ULL;
|
||||
|
||||
/* Reference for the source. */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(root);
|
||||
root_log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, dest,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
old_ino,
|
||||
btrfs_ino(new_dir), old_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* And now for the dest. */
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(dest->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(dest);
|
||||
dest_log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, root,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len,
|
||||
new_ino,
|
||||
btrfs_ino(old_dir), new_idx);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* Update inode version and ctime/mtime. */
|
||||
inode_inc_iversion(old_dir);
|
||||
inode_inc_iversion(new_dir);
|
||||
inode_inc_iversion(old_inode);
|
||||
inode_inc_iversion(new_inode);
|
||||
old_dir->i_ctime = old_dir->i_mtime = ctime;
|
||||
new_dir->i_ctime = new_dir->i_mtime = ctime;
|
||||
old_inode->i_ctime = ctime;
|
||||
new_inode->i_ctime = ctime;
|
||||
|
||||
if (old_dentry->d_parent != new_dentry->d_parent) {
|
||||
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
|
||||
btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
|
||||
}
|
||||
|
||||
/* src is a subvolume */
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
|
||||
ret = btrfs_unlink_subvol(trans, root, old_dir,
|
||||
root_objectid,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len);
|
||||
} else { /* src is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, root, old_dir,
|
||||
old_dentry->d_inode,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, root, old_inode);
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* dest is a subvolume */
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
|
||||
ret = btrfs_unlink_subvol(trans, dest, new_dir,
|
||||
root_objectid,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len);
|
||||
} else { /* dest is an inode */
|
||||
ret = __btrfs_unlink_inode(trans, dest, new_dir,
|
||||
new_dentry->d_inode,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len);
|
||||
if (!ret)
|
||||
ret = btrfs_update_inode(trans, dest, new_inode);
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, new_dir, old_inode,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len, 0, old_idx);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
ret = btrfs_add_link(trans, old_dir, new_inode,
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len, 0, new_idx);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
if (old_inode->i_nlink == 1)
|
||||
BTRFS_I(old_inode)->dir_index = old_idx;
|
||||
if (new_inode->i_nlink == 1)
|
||||
BTRFS_I(new_inode)->dir_index = new_idx;
|
||||
|
||||
if (root_log_pinned) {
|
||||
parent = new_dentry->d_parent;
|
||||
btrfs_log_new_name(trans, old_inode, old_dir, parent);
|
||||
btrfs_end_log_trans(root);
|
||||
root_log_pinned = false;
|
||||
}
|
||||
if (dest_log_pinned) {
|
||||
parent = old_dentry->d_parent;
|
||||
btrfs_log_new_name(trans, new_inode, new_dir, parent);
|
||||
btrfs_end_log_trans(dest);
|
||||
dest_log_pinned = false;
|
||||
}
|
||||
out_fail:
|
||||
/*
|
||||
* If we have pinned a log and an error happened, we unpin tasks
|
||||
* trying to sync the log and force them to fallback to a transaction
|
||||
* commit if the log currently contains any of the inodes involved in
|
||||
* this rename operation (to ensure we do not persist a log with an
|
||||
* inconsistent state for any of these inodes or leading to any
|
||||
* inconsistencies when replayed). If the transaction was aborted, the
|
||||
* abortion reason is propagated to userspace when attempting to commit
|
||||
* the transaction. If the log does not contain any of these inodes, we
|
||||
* allow the tasks to sync it.
|
||||
*/
|
||||
if (ret && (root_log_pinned || dest_log_pinned)) {
|
||||
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
|
||||
(new_inode &&
|
||||
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
|
||||
if (root_log_pinned) {
|
||||
btrfs_end_log_trans(root);
|
||||
root_log_pinned = false;
|
||||
}
|
||||
if (dest_log_pinned) {
|
||||
btrfs_end_log_trans(dest);
|
||||
dest_log_pinned = false;
|
||||
}
|
||||
}
|
||||
ret = btrfs_end_transaction(trans, root);
|
||||
out_notrans:
|
||||
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
up_read(&dest->fs_info->subvol_sem);
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
up_read(&root->fs_info->subvol_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct inode *dir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int ret;
|
||||
struct inode *inode;
|
||||
u64 objectid;
|
||||
u64 index;
|
||||
|
||||
ret = btrfs_find_free_ino(root, &objectid);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode = btrfs_new_inode(trans, root, dir,
|
||||
dentry->d_name.name,
|
||||
dentry->d_name.len,
|
||||
btrfs_ino(dir),
|
||||
objectid,
|
||||
S_IFCHR | WHITEOUT_MODE,
|
||||
&index);
|
||||
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode->i_op = &btrfs_special_inode_operations;
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
WHITEOUT_DEV);
|
||||
|
||||
ret = btrfs_init_inode_security(trans, inode, dir,
|
||||
&dentry->d_name);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_add_nondir(trans, dir, dentry,
|
||||
inode, 0, index);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
out:
|
||||
unlock_new_inode(inode);
|
||||
if (ret)
|
||||
inode_dec_link_count(inode);
|
||||
iput(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
unsigned int trans_num_items;
|
||||
struct btrfs_root *root = BTRFS_I(old_dir)->root;
|
||||
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
|
||||
struct inode *new_inode = d_inode(new_dentry);
|
||||
struct inode *old_inode = d_inode(old_dentry);
|
||||
u64 index = 0;
|
||||
u64 root_objectid;
|
||||
int ret;
|
||||
u64 old_ino = btrfs_ino(old_inode);
|
||||
bool log_pinned = false;
|
||||
|
||||
if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
|
||||
return -EPERM;
|
||||
@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
* We want to reserve the absolute worst case amount of items. So if
|
||||
* both inodes are subvols and we need to unlink them then that would
|
||||
* require 4 item modifications, but if they are both normal inodes it
|
||||
* would require 5 item modifications, so we'll assume their normal
|
||||
* would require 5 item modifications, so we'll assume they are normal
|
||||
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
|
||||
* should cover the worst case number of items we'll modify.
|
||||
* If our rename has the whiteout flag, we need more 5 units for the
|
||||
* new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item
|
||||
* when selinux is enabled).
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 11);
|
||||
trans_num_items = 11;
|
||||
if (flags & RENAME_WHITEOUT)
|
||||
trans_num_items += 5;
|
||||
trans = btrfs_start_transaction(root, trans_num_items);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_notrans;
|
||||
}
|
||||
|
||||
if (dest != root)
|
||||
btrfs_record_root_in_trans(trans, dest);
|
||||
@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
/* force full log commit if subvolume involved. */
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
} else {
|
||||
btrfs_pin_log_trans(root);
|
||||
log_pinned = true;
|
||||
ret = btrfs_insert_inode_ref(trans, dest,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
btrfs_ino(new_dir), index);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
/*
|
||||
* this is an ugly little race, but the rename is required
|
||||
* to make sure that if we crash, the inode is either at the
|
||||
* old name or the new one. pinning the log transaction lets
|
||||
* us make sure we don't allow a log commit to come in after
|
||||
* we unlink the name but before we add the new name back in.
|
||||
*/
|
||||
btrfs_pin_log_trans(root);
|
||||
}
|
||||
|
||||
inode_inc_iversion(old_dir);
|
||||
@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
if (old_inode->i_nlink == 1)
|
||||
BTRFS_I(old_inode)->dir_index = index;
|
||||
|
||||
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
if (log_pinned) {
|
||||
struct dentry *parent = new_dentry->d_parent;
|
||||
|
||||
btrfs_log_new_name(trans, old_inode, old_dir, parent);
|
||||
btrfs_end_log_trans(root);
|
||||
log_pinned = false;
|
||||
}
|
||||
|
||||
if (flags & RENAME_WHITEOUT) {
|
||||
ret = btrfs_whiteout_for_rename(trans, root, old_dir,
|
||||
old_dentry);
|
||||
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
out_fail:
|
||||
/*
|
||||
* If we have pinned the log and an error happened, we unpin tasks
|
||||
* trying to sync the log and force them to fallback to a transaction
|
||||
* commit if the log currently contains any of the inodes involved in
|
||||
* this rename operation (to ensure we do not persist a log with an
|
||||
* inconsistent state for any of these inodes or leading to any
|
||||
* inconsistencies when replayed). If the transaction was aborted, the
|
||||
* abortion reason is propagated to userspace when attempting to commit
|
||||
* the transaction. If the log does not contain any of these inodes, we
|
||||
* allow the tasks to sync it.
|
||||
*/
|
||||
if (ret && log_pinned) {
|
||||
if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
|
||||
btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
|
||||
(new_inode &&
|
||||
btrfs_inode_in_log(new_inode, root->fs_info->generation)))
|
||||
btrfs_set_log_full_commit(root->fs_info, trans);
|
||||
|
||||
btrfs_end_log_trans(root);
|
||||
log_pinned = false;
|
||||
}
|
||||
btrfs_end_transaction(trans, root);
|
||||
out_notrans:
|
||||
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
|
||||
@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (flags & ~RENAME_NOREPLACE)
|
||||
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
|
||||
return -EINVAL;
|
||||
|
||||
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
|
||||
if (flags & RENAME_EXCHANGE)
|
||||
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
|
||||
new_dentry);
|
||||
|
||||
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
|
||||
}
|
||||
|
||||
static void btrfs_run_delalloc_work(struct btrfs_work *work)
|
||||
@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
btrfs_end_transaction(trans, root);
|
||||
break;
|
||||
}
|
||||
btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
|
||||
|
||||
last_alloc = ins.offset;
|
||||
ret = insert_reserved_file_extent(trans, inode,
|
||||
@ -10184,7 +10518,7 @@ static const struct file_operations btrfs_dir_file_operations = {
|
||||
.iterate = btrfs_real_readdir,
|
||||
.unlocked_ioctl = btrfs_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = btrfs_ioctl,
|
||||
.compat_ioctl = btrfs_compat_ioctl,
|
||||
#endif
|
||||
.release = btrfs_release_file,
|
||||
.fsync = btrfs_sync_file,
|
||||
|
198
fs/btrfs/ioctl.c
198
fs/btrfs/ioctl.c
@ -125,10 +125,10 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
|
||||
if (flags & BTRFS_INODE_NODATACOW)
|
||||
iflags |= FS_NOCOW_FL;
|
||||
|
||||
if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
|
||||
iflags |= FS_COMPR_FL;
|
||||
else if (flags & BTRFS_INODE_NOCOMPRESS)
|
||||
if (flags & BTRFS_INODE_NOCOMPRESS)
|
||||
iflags |= FS_NOCOMP_FL;
|
||||
else if (flags & BTRFS_INODE_COMPRESS)
|
||||
iflags |= FS_COMPR_FL;
|
||||
|
||||
return iflags;
|
||||
}
|
||||
@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir,
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root_item root_item;
|
||||
struct btrfs_root_item *root_item;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_root *root = BTRFS_I(dir)->root;
|
||||
@ -455,16 +455,22 @@ static noinline int create_subvol(struct inode *dir,
|
||||
u64 qgroup_reserved;
|
||||
uuid_le new_uuid;
|
||||
|
||||
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
|
||||
if (!root_item)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto fail_free;
|
||||
|
||||
/*
|
||||
* Don't create subvolume whose level is not zero. Or qgroup will be
|
||||
* screwed up since it assume subvolme qgroup's level to be 0.
|
||||
*/
|
||||
if (btrfs_qgroup_level(objectid))
|
||||
return -ENOSPC;
|
||||
if (btrfs_qgroup_level(objectid)) {
|
||||
ret = -ENOSPC;
|
||||
goto fail_free;
|
||||
}
|
||||
|
||||
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
|
||||
/*
|
||||
@ -474,14 +480,14 @@ static noinline int create_subvol(struct inode *dir,
|
||||
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
|
||||
8, &qgroup_reserved, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto fail_free;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
btrfs_subvolume_release_metadata(root, &block_rsv,
|
||||
qgroup_reserved);
|
||||
return ret;
|
||||
goto fail_free;
|
||||
}
|
||||
trans->block_rsv = &block_rsv;
|
||||
trans->bytes_reserved = block_rsv.size;
|
||||
@ -509,47 +515,45 @@ static noinline int create_subvol(struct inode *dir,
|
||||
BTRFS_UUID_SIZE);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
memset(&root_item, 0, sizeof(root_item));
|
||||
|
||||
inode_item = &root_item.inode;
|
||||
inode_item = &root_item->inode;
|
||||
btrfs_set_stack_inode_generation(inode_item, 1);
|
||||
btrfs_set_stack_inode_size(inode_item, 3);
|
||||
btrfs_set_stack_inode_nlink(inode_item, 1);
|
||||
btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
|
||||
btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
|
||||
|
||||
btrfs_set_root_flags(&root_item, 0);
|
||||
btrfs_set_root_limit(&root_item, 0);
|
||||
btrfs_set_root_flags(root_item, 0);
|
||||
btrfs_set_root_limit(root_item, 0);
|
||||
btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
|
||||
|
||||
btrfs_set_root_bytenr(&root_item, leaf->start);
|
||||
btrfs_set_root_generation(&root_item, trans->transid);
|
||||
btrfs_set_root_level(&root_item, 0);
|
||||
btrfs_set_root_refs(&root_item, 1);
|
||||
btrfs_set_root_used(&root_item, leaf->len);
|
||||
btrfs_set_root_last_snapshot(&root_item, 0);
|
||||
btrfs_set_root_bytenr(root_item, leaf->start);
|
||||
btrfs_set_root_generation(root_item, trans->transid);
|
||||
btrfs_set_root_level(root_item, 0);
|
||||
btrfs_set_root_refs(root_item, 1);
|
||||
btrfs_set_root_used(root_item, leaf->len);
|
||||
btrfs_set_root_last_snapshot(root_item, 0);
|
||||
|
||||
btrfs_set_root_generation_v2(&root_item,
|
||||
btrfs_root_generation(&root_item));
|
||||
btrfs_set_root_generation_v2(root_item,
|
||||
btrfs_root_generation(root_item));
|
||||
uuid_le_gen(&new_uuid);
|
||||
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
|
||||
btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
|
||||
btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
|
||||
root_item.ctime = root_item.otime;
|
||||
btrfs_set_root_ctransid(&root_item, trans->transid);
|
||||
btrfs_set_root_otransid(&root_item, trans->transid);
|
||||
memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
|
||||
btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
|
||||
btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
|
||||
root_item->ctime = root_item->otime;
|
||||
btrfs_set_root_ctransid(root_item, trans->transid);
|
||||
btrfs_set_root_otransid(root_item, trans->transid);
|
||||
|
||||
btrfs_tree_unlock(leaf);
|
||||
free_extent_buffer(leaf);
|
||||
leaf = NULL;
|
||||
|
||||
btrfs_set_root_dirid(&root_item, new_dirid);
|
||||
btrfs_set_root_dirid(root_item, new_dirid);
|
||||
|
||||
key.objectid = objectid;
|
||||
key.offset = 0;
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
|
||||
&root_item);
|
||||
root_item);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
@ -601,12 +605,13 @@ static noinline int create_subvol(struct inode *dir,
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
|
||||
root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
|
||||
root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
|
||||
objectid);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
|
||||
fail:
|
||||
kfree(root_item);
|
||||
trans->block_rsv = NULL;
|
||||
trans->bytes_reserved = 0;
|
||||
btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
|
||||
@ -629,6 +634,10 @@ fail:
|
||||
d_instantiate(dentry, inode);
|
||||
}
|
||||
return ret;
|
||||
|
||||
fail_free:
|
||||
kfree(root_item);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
|
||||
@ -681,7 +690,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
if (ret)
|
||||
goto dec_and_free;
|
||||
|
||||
btrfs_wait_ordered_extents(root, -1);
|
||||
btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
|
||||
|
||||
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
|
||||
BTRFS_BLOCK_RSV_TEMP);
|
||||
@ -2671,10 +2680,10 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
|
||||
struct btrfs_ioctl_vol_args *vol_args;
|
||||
struct btrfs_ioctl_vol_args_v2 *vol_args;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
@ -2690,7 +2699,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
goto err_drop;
|
||||
}
|
||||
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
/* Check for compatibility reject unknown flags */
|
||||
if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
|
||||
1)) {
|
||||
@ -2699,13 +2710,23 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
}
|
||||
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
ret = btrfs_rm_device(root, vol_args->name);
|
||||
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
|
||||
ret = btrfs_rm_device(root, NULL, vol_args->devid);
|
||||
} else {
|
||||
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
|
||||
ret = btrfs_rm_device(root, vol_args->name, 0);
|
||||
}
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
|
||||
|
||||
if (!ret)
|
||||
btrfs_info(root->fs_info, "disk deleted %s",vol_args->name);
|
||||
|
||||
if (!ret) {
|
||||
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
|
||||
btrfs_info(root->fs_info, "device deleted: id %llu",
|
||||
vol_args->devid);
|
||||
else
|
||||
btrfs_info(root->fs_info, "device deleted: %s",
|
||||
vol_args->name);
|
||||
}
|
||||
out:
|
||||
kfree(vol_args);
|
||||
err_drop:
|
||||
@ -2713,6 +2734,47 @@ err_drop:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
|
||||
struct btrfs_ioctl_vol_args *vol_args;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
|
||||
1)) {
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args)) {
|
||||
ret = PTR_ERR(vol_args);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
ret = btrfs_rm_device(root, vol_args->name, 0);
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
|
||||
if (!ret)
|
||||
btrfs_info(root->fs_info, "disk deleted %s",vol_args->name);
|
||||
kfree(vol_args);
|
||||
out:
|
||||
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
|
||||
out_drop_write:
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
|
||||
{
|
||||
struct btrfs_ioctl_fs_info_args *fi_args;
|
||||
@ -3472,13 +3534,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||
u64 last_dest_end = destoff;
|
||||
|
||||
ret = -ENOMEM;
|
||||
buf = vmalloc(root->nodesize);
|
||||
if (!buf)
|
||||
return ret;
|
||||
buf = kmalloc(root->nodesize, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!buf) {
|
||||
buf = vmalloc(root->nodesize);
|
||||
if (!buf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
vfree(buf);
|
||||
kvfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3779,7 +3844,7 @@ process_slot:
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
vfree(buf);
|
||||
kvfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4380,7 +4445,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
|
||||
1)) {
|
||||
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
|
||||
} else {
|
||||
ret = btrfs_dev_replace_start(root, p);
|
||||
ret = btrfs_dev_replace_by_ioctl(root, p);
|
||||
atomic_set(
|
||||
&root->fs_info->mutually_exclusive_operation_running,
|
||||
0);
|
||||
@ -4851,8 +4916,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
/* update qgroup status and info */
|
||||
err = btrfs_run_qgroups(trans, root->fs_info);
|
||||
if (err < 0)
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
"failed to update qgroup status and info\n");
|
||||
btrfs_handle_fs_error(root->fs_info, err,
|
||||
"failed to update qgroup status and info");
|
||||
err = btrfs_end_transaction(trans, root);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
@ -5398,9 +5463,15 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
spin_lock(&root->fs_info->super_lock);
|
||||
newflags = btrfs_super_compat_flags(super_block);
|
||||
@ -5419,7 +5490,11 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
|
||||
btrfs_set_super_incompat_flags(super_block, newflags);
|
||||
spin_unlock(&root->fs_info->super_lock);
|
||||
|
||||
return btrfs_commit_transaction(trans, root);
|
||||
ret = btrfs_commit_transaction(trans, root);
|
||||
out_drop_write:
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
@ -5463,6 +5538,8 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_add_dev(root, argp);
|
||||
case BTRFS_IOC_RM_DEV:
|
||||
return btrfs_ioctl_rm_dev(file, argp);
|
||||
case BTRFS_IOC_RM_DEV_V2:
|
||||
return btrfs_ioctl_rm_dev_v2(file, argp);
|
||||
case BTRFS_IOC_FS_INFO:
|
||||
return btrfs_ioctl_fs_info(root, argp);
|
||||
case BTRFS_IOC_DEV_INFO:
|
||||
@ -5556,3 +5633,24 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case FS_IOC32_GETFLAGS:
|
||||
cmd = FS_IOC_GETFLAGS;
|
||||
break;
|
||||
case FS_IOC32_SETFLAGS:
|
||||
cmd = FS_IOC_SETFLAGS;
|
||||
break;
|
||||
case FS_IOC32_GETVERSION:
|
||||
cmd = FS_IOC_GETVERSION;
|
||||
break;
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
||||
}
|
||||
#endif
|
||||
|
@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
|
||||
* wait for all the ordered extents in a root. This is done when balancing
|
||||
* space between drives.
|
||||
*/
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct list_head splice, works;
|
||||
LIST_HEAD(splice);
|
||||
LIST_HEAD(skipped);
|
||||
LIST_HEAD(works);
|
||||
struct btrfs_ordered_extent *ordered, *next;
|
||||
int count = 0;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
INIT_LIST_HEAD(&works);
|
||||
const u64 range_end = range_start + range_len;
|
||||
|
||||
mutex_lock(&root->ordered_extent_mutex);
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
while (!list_empty(&splice) && nr) {
|
||||
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
|
||||
root_extent_list);
|
||||
|
||||
if (range_end <= ordered->start ||
|
||||
ordered->start + ordered->disk_len <= range_start) {
|
||||
list_move_tail(&ordered->root_extent_list, &skipped);
|
||||
cond_resched_lock(&root->ordered_extent_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
list_move_tail(&ordered->root_extent_list,
|
||||
&root->ordered_extents);
|
||||
atomic_inc(&ordered->refs);
|
||||
@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
nr--;
|
||||
count++;
|
||||
}
|
||||
list_splice_tail(&skipped, &root->ordered_extents);
|
||||
list_splice_tail(&splice, &root->ordered_extents);
|
||||
spin_unlock(&root->ordered_extent_lock);
|
||||
|
||||
@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
|
||||
return count;
|
||||
}
|
||||
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
|
||||
&fs_info->ordered_roots);
|
||||
spin_unlock(&fs_info->ordered_root_lock);
|
||||
|
||||
done = btrfs_wait_ordered_extents(root, nr);
|
||||
done = btrfs_wait_ordered_extents(root, nr,
|
||||
range_start, range_len);
|
||||
btrfs_put_fs_root(root);
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
|
@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
|
||||
struct btrfs_ordered_extent *ordered);
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u32 *sum, int len);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_get_logged_extents(struct inode *inode,
|
||||
struct list_head *logged_list,
|
||||
const loff_t start,
|
||||
|
@ -2418,7 +2418,7 @@ again:
|
||||
}
|
||||
out:
|
||||
if (ret) {
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, ret, NULL);
|
||||
if (!list_empty(&reloc_roots))
|
||||
free_reloc_roots(&reloc_roots);
|
||||
|
||||
@ -4254,12 +4254,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
|
||||
btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
|
||||
rc->block_group->key.objectid, rc->block_group->flags);
|
||||
|
||||
ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out;
|
||||
}
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_block_group_reservations(rc->block_group);
|
||||
btrfs_wait_nocow_writers(rc->block_group);
|
||||
btrfs_wait_ordered_roots(fs_info, -1,
|
||||
rc->block_group->key.objectid,
|
||||
rc->block_group->key.offset);
|
||||
|
||||
while (1) {
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
|
@ -284,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
|
||||
trans = btrfs_join_transaction(tree_root);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
btrfs_std_error(tree_root->fs_info, err,
|
||||
btrfs_handle_fs_error(tree_root->fs_info, err,
|
||||
"Failed to start trans to delete "
|
||||
"orphan item");
|
||||
break;
|
||||
@ -293,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
|
||||
root_key.objectid);
|
||||
btrfs_end_transaction(trans, tree_root);
|
||||
if (err) {
|
||||
btrfs_std_error(tree_root->fs_info, err,
|
||||
btrfs_handle_fs_error(tree_root->fs_info, err,
|
||||
"Failed to delete root orphan "
|
||||
"item");
|
||||
break;
|
||||
|
@ -1350,7 +1350,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||
recover->bbio = bbio;
|
||||
recover->map_length = mapped_length;
|
||||
|
||||
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
|
||||
BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
||||
|
||||
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
|
||||
|
||||
@ -2127,6 +2127,8 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
|
||||
if (bio->bi_error)
|
||||
sblock->no_io_error_seen = 0;
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
|
||||
}
|
||||
|
||||
@ -2860,7 +2862,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||
int extent_mirror_num;
|
||||
int stop_loop = 0;
|
||||
|
||||
nsectors = map->stripe_len / root->sectorsize;
|
||||
nsectors = div_u64(map->stripe_len, root->sectorsize);
|
||||
bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
|
||||
sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
|
||||
GFP_NOFS);
|
||||
@ -3070,7 +3072,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
int slot;
|
||||
u64 nstripes;
|
||||
struct extent_buffer *l;
|
||||
struct btrfs_key key;
|
||||
u64 physical;
|
||||
u64 logical;
|
||||
u64 logic_end;
|
||||
@ -3079,7 +3080,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
int mirror_num;
|
||||
struct reada_control *reada1;
|
||||
struct reada_control *reada2;
|
||||
struct btrfs_key key_start;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key key_end;
|
||||
u64 increment = map->stripe_len;
|
||||
u64 offset;
|
||||
@ -3158,21 +3159,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
scrub_blocked_if_needed(fs_info);
|
||||
|
||||
/* FIXME it might be better to start readahead at commit root */
|
||||
key_start.objectid = logical;
|
||||
key_start.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
key_start.offset = (u64)0;
|
||||
key.objectid = logical;
|
||||
key.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
key.offset = (u64)0;
|
||||
key_end.objectid = logic_end;
|
||||
key_end.type = BTRFS_METADATA_ITEM_KEY;
|
||||
key_end.offset = (u64)-1;
|
||||
reada1 = btrfs_reada_add(root, &key_start, &key_end);
|
||||
reada1 = btrfs_reada_add(root, &key, &key_end);
|
||||
|
||||
key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key_start.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
key_start.offset = logical;
|
||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
key.offset = logical;
|
||||
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
key_end.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
key_end.offset = logic_end;
|
||||
reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
|
||||
reada2 = btrfs_reada_add(csum_root, &key, &key_end);
|
||||
|
||||
if (!IS_ERR(reada1))
|
||||
btrfs_reada_wait(reada1);
|
||||
|
@ -5939,6 +5939,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
u32 i;
|
||||
u64 *clone_sources_tmp = NULL;
|
||||
int clone_sources_to_rollback = 0;
|
||||
unsigned alloc_size;
|
||||
int sort_clone_roots = 0;
|
||||
int index;
|
||||
|
||||
@ -5978,6 +5979,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (arg->clone_sources_count >
|
||||
ULLONG_MAX / sizeof(*arg->clone_sources)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!access_ok(VERIFY_READ, arg->clone_sources,
|
||||
sizeof(*arg->clone_sources) *
|
||||
arg->clone_sources_count)) {
|
||||
@ -6022,40 +6029,53 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
sctx->clone_roots_cnt = arg->clone_sources_count;
|
||||
|
||||
sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
|
||||
sctx->send_buf = vmalloc(sctx->send_max_size);
|
||||
sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!sctx->send_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
sctx->send_buf = vmalloc(sctx->send_max_size);
|
||||
if (!sctx->send_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
|
||||
sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!sctx->read_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
|
||||
if (!sctx->read_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
sctx->pending_dir_moves = RB_ROOT;
|
||||
sctx->waiting_dir_moves = RB_ROOT;
|
||||
sctx->orphan_dirs = RB_ROOT;
|
||||
|
||||
sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
|
||||
(arg->clone_sources_count + 1));
|
||||
if (!sctx->clone_roots) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
|
||||
|
||||
if (arg->clone_sources_count) {
|
||||
clone_sources_tmp = vmalloc(arg->clone_sources_count *
|
||||
sizeof(*arg->clone_sources));
|
||||
if (!clone_sources_tmp) {
|
||||
sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!sctx->clone_roots) {
|
||||
sctx->clone_roots = vzalloc(alloc_size);
|
||||
if (!sctx->clone_roots) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
|
||||
|
||||
if (arg->clone_sources_count) {
|
||||
clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!clone_sources_tmp) {
|
||||
clone_sources_tmp = vmalloc(alloc_size);
|
||||
if (!clone_sources_tmp) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
|
||||
arg->clone_sources_count *
|
||||
sizeof(*arg->clone_sources));
|
||||
alloc_size);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
@ -6089,7 +6109,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
sctx->clone_roots[i].root = clone_root;
|
||||
clone_sources_to_rollback = i + 1;
|
||||
}
|
||||
vfree(clone_sources_tmp);
|
||||
kvfree(clone_sources_tmp);
|
||||
clone_sources_tmp = NULL;
|
||||
}
|
||||
|
||||
@ -6207,15 +6227,15 @@ out:
|
||||
btrfs_root_dec_send_in_progress(sctx->parent_root);
|
||||
|
||||
kfree(arg);
|
||||
vfree(clone_sources_tmp);
|
||||
kvfree(clone_sources_tmp);
|
||||
|
||||
if (sctx) {
|
||||
if (sctx->send_filp)
|
||||
fput(sctx->send_filp);
|
||||
|
||||
vfree(sctx->clone_roots);
|
||||
vfree(sctx->send_buf);
|
||||
vfree(sctx->read_buf);
|
||||
kvfree(sctx->clone_roots);
|
||||
kvfree(sctx->send_buf);
|
||||
kvfree(sctx->read_buf);
|
||||
|
||||
name_cache_free(sctx);
|
||||
|
||||
|
@ -97,15 +97,6 @@ const char *btrfs_decode_error(int errno)
|
||||
return errstr;
|
||||
}
|
||||
|
||||
static void save_error_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/*
|
||||
* today we only save the error info into ram. Long term we'll
|
||||
* also send it down to the disk
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
}
|
||||
|
||||
/* btrfs handle error by forcing the filesystem readonly */
|
||||
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
@ -131,11 +122,11 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
|
||||
/*
|
||||
* __btrfs_std_error decodes expected errors from the caller and
|
||||
* __btrfs_handle_fs_error decodes expected errors from the caller and
|
||||
* invokes the approciate error response.
|
||||
*/
|
||||
__cold
|
||||
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
@ -170,8 +161,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Today we only save the error info to memory. Long term we'll
|
||||
* also send it down to the disk
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
|
||||
/* Don't go through full error handling during mount */
|
||||
save_error_info(fs_info);
|
||||
if (sb->s_flags & MS_BORN)
|
||||
btrfs_handle_error(fs_info);
|
||||
}
|
||||
@ -252,7 +248,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
/* Wake up anybody who may be waiting on this transaction */
|
||||
wake_up(&root->fs_info->transaction_wait);
|
||||
wake_up(&root->fs_info->transaction_blocked_wait);
|
||||
__btrfs_std_error(root->fs_info, function, line, errno, NULL);
|
||||
__btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL);
|
||||
}
|
||||
/*
|
||||
* __btrfs_panic decodes unexpected, fatal errors from the caller,
|
||||
@ -1160,7 +1156,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||
return 0;
|
||||
}
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
@ -1488,10 +1484,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
|
||||
memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
|
||||
} else {
|
||||
/*
|
||||
* Since SELinux(the only one supports security_mnt_opts) does
|
||||
* NOT support changing context during remount/mount same sb,
|
||||
* This must be the same or part of the same security options,
|
||||
* just free it.
|
||||
* Since SELinux (the only one supporting security_mnt_opts)
|
||||
* does NOT support changing context during remount/mount of
|
||||
* the same sb, this must be the same or part of the same
|
||||
* security options, just free it.
|
||||
*/
|
||||
security_free_mnt_opts(sec_opts);
|
||||
}
|
||||
@ -1669,8 +1665,8 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
||||
unsigned long old_opts)
|
||||
{
|
||||
/*
|
||||
* We need cleanup all defragable inodes if the autodefragment is
|
||||
* close or the fs is R/O.
|
||||
* We need to cleanup all defragable inodes if the autodefragment is
|
||||
* close or the filesystem is read only.
|
||||
*/
|
||||
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
|
||||
(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
|
||||
@ -2051,9 +2047,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||
int ret;
|
||||
u64 thresh = 0;
|
||||
int mixed = 0;
|
||||
|
||||
/*
|
||||
* holding chunk_muext to avoid allocating new chunks, holding
|
||||
* holding chunk_mutex to avoid allocating new chunks, holding
|
||||
* device_list_mutex to avoid the device being removed
|
||||
*/
|
||||
rcu_read_lock();
|
||||
@ -2076,8 +2073,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
total_free_meta += found->disk_total - found->disk_used;
|
||||
|
||||
/*
|
||||
* Metadata in mixed block goup profiles are accounted in data
|
||||
*/
|
||||
if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
|
||||
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
mixed = 1;
|
||||
else
|
||||
total_free_meta += found->disk_total -
|
||||
found->disk_used;
|
||||
}
|
||||
|
||||
total_used += found->disk_used;
|
||||
}
|
||||
@ -2090,7 +2096,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
|
||||
/* Account global block reserve as used, it's in logical size already */
|
||||
spin_lock(&block_rsv->lock);
|
||||
buf->f_bfree -= block_rsv->size >> bits;
|
||||
/* Mixed block groups accounting is not byte-accurate, avoid overflow */
|
||||
if (buf->f_bfree >= block_rsv->size >> bits)
|
||||
buf->f_bfree -= block_rsv->size >> bits;
|
||||
else
|
||||
buf->f_bfree = 0;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
buf->f_bavail = div_u64(total_free_data, factor);
|
||||
@ -2115,7 +2125,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
*/
|
||||
thresh = 4 * 1024 * 1024;
|
||||
|
||||
if (total_free_meta - thresh < block_rsv->size)
|
||||
if (!mixed && total_free_meta - thresh < block_rsv->size)
|
||||
buf->f_bavail = 0;
|
||||
|
||||
buf->f_type = BTRFS_SUPER_MAGIC;
|
||||
|
@ -120,6 +120,9 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
||||
if (!fs_info)
|
||||
return -EPERM;
|
||||
|
||||
if (fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
ret = kstrtoul(skip_spaces(buf), 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -364,7 +367,13 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
char *label = fs_info->super_copy->label;
|
||||
return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
||||
ssize_t ret;
|
||||
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ret = snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_label_store(struct kobject *kobj,
|
||||
@ -374,6 +383,9 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
size_t p_len;
|
||||
|
||||
if (!fs_info)
|
||||
return -EPERM;
|
||||
|
||||
if (fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
|
@ -311,10 +311,11 @@ loop:
|
||||
* when the transaction commits
|
||||
*/
|
||||
static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root)
|
||||
struct btrfs_root *root,
|
||||
int force)
|
||||
{
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
|
||||
root->last_trans < trans->transid) {
|
||||
if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
|
||||
root->last_trans < trans->transid) || force) {
|
||||
WARN_ON(root == root->fs_info->extent_root);
|
||||
WARN_ON(root->commit_root != root->node);
|
||||
|
||||
@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
smp_wmb();
|
||||
|
||||
spin_lock(&root->fs_info->fs_roots_radix_lock);
|
||||
if (root->last_trans == trans->transid) {
|
||||
if (root->last_trans == trans->transid && !force) {
|
||||
spin_unlock(&root->fs_info->fs_roots_radix_lock);
|
||||
return 0;
|
||||
}
|
||||
@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
return 0;
|
||||
|
||||
mutex_lock(&root->fs_info->reloc_mutex);
|
||||
record_root_in_trans(trans, root);
|
||||
record_root_in_trans(trans, root, 0);
|
||||
mutex_unlock(&root->fs_info->reloc_mutex);
|
||||
|
||||
return 0;
|
||||
@ -1310,6 +1311,97 @@ int btrfs_defrag_root(struct btrfs_root *root)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Bisesctability fixup, remove in 4.8 */
|
||||
#ifndef btrfs_std_error
|
||||
#define btrfs_std_error btrfs_handle_fs_error
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do all special snapshot related qgroup dirty hack.
|
||||
*
|
||||
* Will do all needed qgroup inherit and dirty hack like switch commit
|
||||
* roots inside one transaction and write all btree into disk, to make
|
||||
* qgroup works.
|
||||
*/
|
||||
static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *src,
|
||||
struct btrfs_root *parent,
|
||||
struct btrfs_qgroup_inherit *inherit,
|
||||
u64 dst_objectid)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = src->fs_info;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Save some performance in the case that qgroups are not
|
||||
* enabled. If this check races with the ioctl, rescan will
|
||||
* kick in anyway.
|
||||
*/
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
if (!fs_info->quota_enabled) {
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
return 0;
|
||||
}
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
|
||||
/*
|
||||
* We are going to commit transaction, see btrfs_commit_transaction()
|
||||
* comment for reason locking tree_log_mutex
|
||||
*/
|
||||
mutex_lock(&fs_info->tree_log_mutex);
|
||||
|
||||
ret = commit_fs_roots(trans, src);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = btrfs_qgroup_account_extents(trans, fs_info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* Now qgroup are all updated, we can inherit it to new qgroups */
|
||||
ret = btrfs_qgroup_inherit(trans, fs_info,
|
||||
src->root_key.objectid, dst_objectid,
|
||||
inherit);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Now we do a simplified commit transaction, which will:
|
||||
* 1) commit all subvolume and extent tree
|
||||
* To ensure all subvolume and extent tree have a valid
|
||||
* commit_root to accounting later insert_dir_item()
|
||||
* 2) write all btree blocks onto disk
|
||||
* This is to make sure later btree modification will be cowed
|
||||
* Or commit_root can be populated and cause wrong qgroup numbers
|
||||
* In this simplified commit, we don't really care about other trees
|
||||
* like chunk and root tree, as they won't affect qgroup.
|
||||
* And we don't write super to avoid half committed status.
|
||||
*/
|
||||
ret = commit_cowonly_roots(trans, src);
|
||||
if (ret)
|
||||
goto out;
|
||||
switch_commit_roots(trans->transaction, fs_info);
|
||||
ret = btrfs_write_and_wait_transaction(trans, src);
|
||||
if (ret)
|
||||
btrfs_std_error(fs_info, ret,
|
||||
"Error while writing out transaction for qgroup");
|
||||
|
||||
out:
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
|
||||
/*
|
||||
* Force parent root to be updated, as we recorded it before so its
|
||||
* last_trans == cur_transid.
|
||||
* Or it won't be committed again onto disk after later
|
||||
* insert_dir_item()
|
||||
*/
|
||||
if (!ret)
|
||||
record_root_in_trans(trans, parent, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* new snapshots need to be created at a very specific time in the
|
||||
* transaction commit. This does the actual creation.
|
||||
@ -1383,7 +1475,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
dentry = pending->dentry;
|
||||
parent_inode = pending->dir;
|
||||
parent_root = BTRFS_I(parent_inode)->root;
|
||||
record_root_in_trans(trans, parent_root);
|
||||
record_root_in_trans(trans, parent_root, 0);
|
||||
|
||||
cur_time = current_fs_time(parent_inode->i_sb);
|
||||
|
||||
@ -1420,7 +1512,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
record_root_in_trans(trans, root);
|
||||
record_root_in_trans(trans, root, 0);
|
||||
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
|
||||
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
|
||||
btrfs_check_and_init_root_item(new_root_item);
|
||||
@ -1516,6 +1608,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do special qgroup accounting for snapshot, as we do some qgroup
|
||||
* snapshot hack to do fast snapshot.
|
||||
* To co-operate with that hack, we do hack again.
|
||||
* Or snapshot will be greatly slowed down by a subtree qgroup rescan
|
||||
*/
|
||||
ret = qgroup_account_snapshot(trans, root, parent_root,
|
||||
pending->inherit, objectid);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
ret = btrfs_insert_dir_item(trans, parent_root,
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
parent_inode, &key,
|
||||
@ -1559,23 +1662,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* account qgroup counters before qgroup_inherit()
|
||||
*/
|
||||
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
|
||||
if (ret)
|
||||
goto fail;
|
||||
ret = btrfs_qgroup_account_extents(trans, fs_info);
|
||||
if (ret)
|
||||
goto fail;
|
||||
ret = btrfs_qgroup_inherit(trans, fs_info,
|
||||
root->root_key.objectid,
|
||||
objectid, pending->inherit);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
pending->error = ret;
|
||||
dir_item_existed:
|
||||
@ -1821,7 +1907,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
|
||||
btrfs_wait_ordered_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -2145,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_write_and_wait_transaction(trans, root);
|
||||
if (ret) {
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
btrfs_handle_fs_error(root->fs_info, ret,
|
||||
"Error while writing out transaction");
|
||||
mutex_unlock(&root->fs_info->tree_log_mutex);
|
||||
goto scrub_continue;
|
||||
|
@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
INIT_LIST_HEAD(&extents);
|
||||
|
||||
down_write(&BTRFS_I(inode)->dio_sem);
|
||||
write_lock(&tree->lock);
|
||||
test_gen = root->fs_info->last_trans_committed;
|
||||
|
||||
@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
list_sort(NULL, &extents, extent_cmp);
|
||||
/*
|
||||
* Collect any new ordered extents within the range. This is to
|
||||
* prevent logging file extent items without waiting for the disk
|
||||
* location they point to being written. We do this only to deal
|
||||
* with races against concurrent lockless direct IO writes.
|
||||
*/
|
||||
btrfs_get_logged_extents(inode, logged_list, start, end);
|
||||
/*
|
||||
* Some ordered extents started by fsync might have completed
|
||||
* before we could collect them into the list logged_list, which
|
||||
* means they're gone, not in our logged_list nor in the inode's
|
||||
* ordered tree. We want the application/user space to know an
|
||||
* error happened while attempting to persist file data so that
|
||||
* it can take proper action. If such error happened, we leave
|
||||
* without writing to the log tree and the fsync must report the
|
||||
* file data write error and not commit the current transaction.
|
||||
*/
|
||||
ret = btrfs_inode_check_errors(inode);
|
||||
if (ret)
|
||||
ctx->io_err = ret;
|
||||
process:
|
||||
while (!list_empty(&extents)) {
|
||||
em = list_entry(extents.next, struct extent_map, list);
|
||||
@ -4202,6 +4210,7 @@ process:
|
||||
}
|
||||
WARN_ON(!list_empty(&extents));
|
||||
write_unlock(&tree->lock);
|
||||
up_write(&BTRFS_I(inode)->dio_sem);
|
||||
|
||||
btrfs_release_path(path);
|
||||
return ret;
|
||||
@ -4622,23 +4631,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
|
||||
/*
|
||||
* Collect ordered extents only if we are logging data. This is to
|
||||
* ensure a subsequent request to log this inode in LOG_INODE_ALL mode
|
||||
* will process the ordered extents if they still exists at the time,
|
||||
* because when we collect them we test and set for the flag
|
||||
* BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
|
||||
* same ordered extents. The consequence for the LOG_INODE_ALL log mode
|
||||
* not processing the ordered extents is that we end up logging the
|
||||
* corresponding file extent items, based on the extent maps in the
|
||||
* inode's extent_map_tree's modified_list, without logging the
|
||||
* respective checksums (since the may still be only attached to the
|
||||
* ordered extents and have not been inserted in the csum tree by
|
||||
* btrfs_finish_ordered_io() yet).
|
||||
*/
|
||||
if (inode_only == LOG_INODE_ALL)
|
||||
btrfs_get_logged_extents(inode, &logged_list, start, end);
|
||||
|
||||
/*
|
||||
* a brute force approach to making sure we get the most uptodate
|
||||
* copies of everything.
|
||||
@ -4846,21 +4838,6 @@ log_extents:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (fast_search) {
|
||||
/*
|
||||
* Some ordered extents started by fsync might have completed
|
||||
* before we collected the ordered extents in logged_list, which
|
||||
* means they're gone, not in our logged_list nor in the inode's
|
||||
* ordered tree. We want the application/user space to know an
|
||||
* error happened while attempting to persist file data so that
|
||||
* it can take proper action. If such error happened, we leave
|
||||
* without writing to the log tree and the fsync must report the
|
||||
* file data write error and not commit the current transaction.
|
||||
*/
|
||||
err = btrfs_inode_check_errors(inode);
|
||||
if (err) {
|
||||
ctx->io_err = err;
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
|
||||
&logged_list, ctx, start, end);
|
||||
if (ret) {
|
||||
@ -5158,7 +5135,7 @@ process_leaf:
|
||||
}
|
||||
|
||||
ctx->log_new_dentries = false;
|
||||
if (type == BTRFS_FT_DIR)
|
||||
if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
|
||||
log_mode = LOG_INODE_ALL;
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_log_inode(trans, root, di_inode,
|
||||
@ -5278,11 +5255,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
|
||||
if (IS_ERR(dir_inode))
|
||||
continue;
|
||||
|
||||
if (ctx)
|
||||
ctx->log_new_dentries = false;
|
||||
ret = btrfs_log_inode(trans, root, dir_inode,
|
||||
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
|
||||
if (!ret &&
|
||||
btrfs_must_commit_transaction(trans, dir_inode))
|
||||
ret = 1;
|
||||
if (!ret && ctx && ctx->log_new_dentries)
|
||||
ret = log_new_dir_dentries(trans, root,
|
||||
dir_inode, ctx);
|
||||
iput(dir_inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -5519,7 +5501,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
|
||||
|
||||
ret = walk_log_tree(trans, log_root_tree, &wc);
|
||||
if (ret) {
|
||||
btrfs_std_error(fs_info, ret, "Failed to pin buffers while "
|
||||
btrfs_handle_fs_error(fs_info, ret, "Failed to pin buffers while "
|
||||
"recovering log root tree.");
|
||||
goto error;
|
||||
}
|
||||
@ -5533,7 +5515,7 @@ again:
|
||||
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
|
||||
|
||||
if (ret < 0) {
|
||||
btrfs_std_error(fs_info, ret,
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"Couldn't find tree log root.");
|
||||
goto error;
|
||||
}
|
||||
@ -5551,7 +5533,7 @@ again:
|
||||
log = btrfs_read_fs_root(log_root_tree, &found_key);
|
||||
if (IS_ERR(log)) {
|
||||
ret = PTR_ERR(log);
|
||||
btrfs_std_error(fs_info, ret,
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"Couldn't read tree log root.");
|
||||
goto error;
|
||||
}
|
||||
@ -5566,7 +5548,7 @@ again:
|
||||
free_extent_buffer(log->node);
|
||||
free_extent_buffer(log->commit_root);
|
||||
kfree(log);
|
||||
btrfs_std_error(fs_info, ret, "Couldn't read target root "
|
||||
btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root "
|
||||
"for tree log recovery.");
|
||||
goto error;
|
||||
}
|
||||
@ -5652,11 +5634,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
||||
* into the file. When the file is logged we check it and
|
||||
* don't log the parents if the file is fully on disk.
|
||||
*/
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
BTRFS_I(inode)->last_unlink_trans = trans->transid;
|
||||
mutex_unlock(&BTRFS_I(inode)->log_mutex);
|
||||
}
|
||||
mutex_lock(&BTRFS_I(inode)->log_mutex);
|
||||
BTRFS_I(inode)->last_unlink_trans = trans->transid;
|
||||
mutex_unlock(&BTRFS_I(inode)->log_mutex);
|
||||
|
||||
/*
|
||||
* if this directory was already logged any new
|
||||
|
@ -118,6 +118,21 @@ const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
|
||||
};
|
||||
|
||||
/*
|
||||
* Table to convert BTRFS_RAID_* to the error code if minimum number of devices
|
||||
* condition is not met. Zero means there's no corresponding
|
||||
* BTRFS_ERROR_DEV_*_NOT_MET value.
|
||||
*/
|
||||
const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
|
||||
[BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
|
||||
[BTRFS_RAID_DUP] = 0,
|
||||
[BTRFS_RAID_RAID0] = 0,
|
||||
[BTRFS_RAID_SINGLE] = 0,
|
||||
[BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
|
||||
[BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
|
||||
};
|
||||
|
||||
static int init_first_rw_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_device *device);
|
||||
@ -699,7 +714,8 @@ static noinline int device_list_add(const char *path,
|
||||
* if there is new btrfs on an already registered device,
|
||||
* then remove the stale device entry.
|
||||
*/
|
||||
btrfs_free_stale_device(device);
|
||||
if (ret > 0)
|
||||
btrfs_free_stale_device(device);
|
||||
|
||||
*fs_devices_ret = fs_devices;
|
||||
|
||||
@ -988,6 +1004,56 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_release_disk_super(struct page *page)
|
||||
{
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
|
||||
struct page **page, struct btrfs_super_block **disk_super)
|
||||
{
|
||||
void *p;
|
||||
pgoff_t index;
|
||||
|
||||
/* make sure our super fits in the device */
|
||||
if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
|
||||
return 1;
|
||||
|
||||
/* make sure our super fits in the page */
|
||||
if (sizeof(**disk_super) > PAGE_SIZE)
|
||||
return 1;
|
||||
|
||||
/* make sure our super doesn't straddle pages on disk */
|
||||
index = bytenr >> PAGE_SHIFT;
|
||||
if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
|
||||
return 1;
|
||||
|
||||
/* pull in the page with our super */
|
||||
*page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
|
||||
index, GFP_KERNEL);
|
||||
|
||||
if (IS_ERR_OR_NULL(*page))
|
||||
return 1;
|
||||
|
||||
p = kmap(*page);
|
||||
|
||||
/* align our pointer to the offset of the super block */
|
||||
*disk_super = p + (bytenr & ~PAGE_MASK);
|
||||
|
||||
if (btrfs_super_bytenr(*disk_super) != bytenr ||
|
||||
btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
|
||||
btrfs_release_disk_super(*page);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((*disk_super)->label[0] &&
|
||||
(*disk_super)->label[BTRFS_LABEL_SIZE - 1])
|
||||
(*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for a btrfs signature on a device. This may be called out of the mount path
|
||||
* and we are not allowed to call set_blocksize during the scan. The superblock
|
||||
@ -999,13 +1065,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
struct btrfs_super_block *disk_super;
|
||||
struct block_device *bdev;
|
||||
struct page *page;
|
||||
void *p;
|
||||
int ret = -EINVAL;
|
||||
u64 devid;
|
||||
u64 transid;
|
||||
u64 total_devices;
|
||||
u64 bytenr;
|
||||
pgoff_t index;
|
||||
|
||||
/*
|
||||
* we would like to check all the supers, but that would make
|
||||
@ -1018,41 +1082,14 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
mutex_lock(&uuid_mutex);
|
||||
|
||||
bdev = blkdev_get_by_path(path, flags, holder);
|
||||
|
||||
if (IS_ERR(bdev)) {
|
||||
ret = PTR_ERR(bdev);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* make sure our super fits in the device */
|
||||
if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
|
||||
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super))
|
||||
goto error_bdev_put;
|
||||
|
||||
/* make sure our super fits in the page */
|
||||
if (sizeof(*disk_super) > PAGE_SIZE)
|
||||
goto error_bdev_put;
|
||||
|
||||
/* make sure our super doesn't straddle pages on disk */
|
||||
index = bytenr >> PAGE_SHIFT;
|
||||
if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
|
||||
goto error_bdev_put;
|
||||
|
||||
/* pull in the page with our super */
|
||||
page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
|
||||
index, GFP_NOFS);
|
||||
|
||||
if (IS_ERR_OR_NULL(page))
|
||||
goto error_bdev_put;
|
||||
|
||||
p = kmap(page);
|
||||
|
||||
/* align our pointer to the offset of the super block */
|
||||
disk_super = p + (bytenr & ~PAGE_MASK);
|
||||
|
||||
if (btrfs_super_bytenr(disk_super) != bytenr ||
|
||||
btrfs_super_magic(disk_super) != BTRFS_MAGIC)
|
||||
goto error_unmap;
|
||||
|
||||
devid = btrfs_stack_device_id(&disk_super->dev_item);
|
||||
transid = btrfs_super_generation(disk_super);
|
||||
total_devices = btrfs_super_num_devices(disk_super);
|
||||
@ -1060,8 +1097,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
|
||||
if (ret > 0) {
|
||||
if (disk_super->label[0]) {
|
||||
if (disk_super->label[BTRFS_LABEL_SIZE - 1])
|
||||
disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
|
||||
printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
|
||||
} else {
|
||||
printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
|
||||
@ -1073,9 +1108,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
if (!ret && fs_devices_ret)
|
||||
(*fs_devices_ret)->total_devices = total_devices;
|
||||
|
||||
error_unmap:
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
btrfs_release_disk_super(page);
|
||||
|
||||
error_bdev_put:
|
||||
blkdev_put(bdev, flags);
|
||||
@ -1454,7 +1487,7 @@ again:
|
||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_dev_extent);
|
||||
} else {
|
||||
btrfs_std_error(root->fs_info, ret, "Slot search failed");
|
||||
btrfs_handle_fs_error(root->fs_info, ret, "Slot search failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1462,7 +1495,7 @@ again:
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
if (ret) {
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
btrfs_handle_fs_error(root->fs_info, ret,
|
||||
"Failed to remove dev extent item");
|
||||
} else {
|
||||
set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
|
||||
@ -1688,31 +1721,91 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
/*
|
||||
* Verify that @num_devices satisfies the RAID profile constraints in the whole
|
||||
* filesystem. It's up to the caller to adjust that number regarding eg. device
|
||||
* replace.
|
||||
*/
|
||||
static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
|
||||
u64 num_devices)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
struct btrfs_device *next_device;
|
||||
struct block_device *bdev;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct btrfs_super_block *disk_super;
|
||||
struct btrfs_fs_devices *cur_devices;
|
||||
u64 all_avail;
|
||||
u64 devid;
|
||||
u64 num_devices;
|
||||
u8 *dev_uuid;
|
||||
unsigned seq;
|
||||
int ret = 0;
|
||||
bool clear_super = false;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
int i;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&root->fs_info->profiles_lock);
|
||||
seq = read_seqbegin(&fs_info->profiles_lock);
|
||||
|
||||
all_avail = root->fs_info->avail_data_alloc_bits |
|
||||
root->fs_info->avail_system_alloc_bits |
|
||||
root->fs_info->avail_metadata_alloc_bits;
|
||||
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
|
||||
all_avail = fs_info->avail_data_alloc_bits |
|
||||
fs_info->avail_system_alloc_bits |
|
||||
fs_info->avail_metadata_alloc_bits;
|
||||
} while (read_seqretry(&fs_info->profiles_lock, seq));
|
||||
|
||||
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
|
||||
if (!(all_avail & btrfs_raid_group[i]))
|
||||
continue;
|
||||
|
||||
if (num_devices < btrfs_raid_array[i].devs_min) {
|
||||
int ret = btrfs_raid_mindev_error[i];
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs,
|
||||
struct btrfs_device *device)
|
||||
{
|
||||
struct btrfs_device *next_device;
|
||||
|
||||
list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
|
||||
if (next_device != device &&
|
||||
!next_device->missing && next_device->bdev)
|
||||
return next_device;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to check if the given device is part of s_bdev / latest_bdev
|
||||
* and replace it with the provided or the next active device, in the context
|
||||
* where this function called, there should be always be another device (or
|
||||
* this_dev) which is active.
|
||||
*/
|
||||
void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *device, struct btrfs_device *this_dev)
|
||||
{
|
||||
struct btrfs_device *next_device;
|
||||
|
||||
if (this_dev)
|
||||
next_device = this_dev;
|
||||
else
|
||||
next_device = btrfs_find_next_active_device(fs_info->fs_devices,
|
||||
device);
|
||||
ASSERT(next_device);
|
||||
|
||||
if (fs_info->sb->s_bdev &&
|
||||
(fs_info->sb->s_bdev == device->bdev))
|
||||
fs_info->sb->s_bdev = next_device->bdev;
|
||||
|
||||
if (fs_info->fs_devices->latest_bdev == device->bdev)
|
||||
fs_info->fs_devices->latest_bdev = next_device->bdev;
|
||||
}
|
||||
|
||||
int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
struct btrfs_fs_devices *cur_devices;
|
||||
u64 num_devices;
|
||||
int ret = 0;
|
||||
bool clear_super = false;
|
||||
char *dev_name = NULL;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
|
||||
num_devices = root->fs_info->fs_devices->num_devices;
|
||||
btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
|
||||
@ -1722,78 +1815,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
}
|
||||
btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
|
||||
|
||||
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
|
||||
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
|
||||
ret = btrfs_check_raid_min_devices(root->fs_info, num_devices - 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
|
||||
ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
|
||||
ret = btrfs_find_device_by_devspec(root, devid, device_path,
|
||||
&device);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
|
||||
root->fs_info->fs_devices->rw_devices <= 2) {
|
||||
ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
|
||||
goto out;
|
||||
}
|
||||
if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
|
||||
root->fs_info->fs_devices->rw_devices <= 3) {
|
||||
ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (strcmp(device_path, "missing") == 0) {
|
||||
struct list_head *devices;
|
||||
struct btrfs_device *tmp;
|
||||
|
||||
device = NULL;
|
||||
devices = &root->fs_info->fs_devices->devices;
|
||||
/*
|
||||
* It is safe to read the devices since the volume_mutex
|
||||
* is held.
|
||||
*/
|
||||
list_for_each_entry(tmp, devices, dev_list) {
|
||||
if (tmp->in_fs_metadata &&
|
||||
!tmp->is_tgtdev_for_dev_replace &&
|
||||
!tmp->bdev) {
|
||||
device = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bdev = NULL;
|
||||
bh = NULL;
|
||||
disk_super = NULL;
|
||||
if (!device) {
|
||||
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
ret = btrfs_get_bdev_and_sb(device_path,
|
||||
FMODE_WRITE | FMODE_EXCL,
|
||||
root->fs_info->bdev_holder, 0,
|
||||
&bdev, &bh);
|
||||
if (ret)
|
||||
goto out;
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
devid = btrfs_stack_device_id(&disk_super->dev_item);
|
||||
dev_uuid = disk_super->dev_item.uuid;
|
||||
device = btrfs_find_device(root->fs_info, devid, dev_uuid,
|
||||
disk_super->fsid);
|
||||
if (!device) {
|
||||
ret = -ENOENT;
|
||||
goto error_brelse;
|
||||
}
|
||||
}
|
||||
|
||||
if (device->is_tgtdev_for_dev_replace) {
|
||||
ret = BTRFS_ERROR_DEV_TGT_REPLACE;
|
||||
goto error_brelse;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
|
||||
ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
|
||||
goto error_brelse;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (device->writeable) {
|
||||
@ -1801,6 +1839,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
list_del_init(&device->dev_alloc_list);
|
||||
device->fs_devices->rw_devices--;
|
||||
unlock_chunks(root);
|
||||
dev_name = kstrdup(device->name->str, GFP_KERNEL);
|
||||
if (!dev_name) {
|
||||
ret = -ENOMEM;
|
||||
goto error_undo;
|
||||
}
|
||||
clear_super = true;
|
||||
}
|
||||
|
||||
@ -1842,12 +1885,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
if (device->missing)
|
||||
device->fs_devices->missing_devices--;
|
||||
|
||||
next_device = list_entry(root->fs_info->fs_devices->devices.next,
|
||||
struct btrfs_device, dev_list);
|
||||
if (device->bdev == root->fs_info->sb->s_bdev)
|
||||
root->fs_info->sb->s_bdev = next_device->bdev;
|
||||
if (device->bdev == root->fs_info->fs_devices->latest_bdev)
|
||||
root->fs_info->fs_devices->latest_bdev = next_device->bdev;
|
||||
btrfs_assign_next_active_device(root->fs_info, device, NULL);
|
||||
|
||||
if (device->bdev) {
|
||||
device->fs_devices->open_devices--;
|
||||
@ -1883,63 +1921,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
* at this point, the device is zero sized. We want to
|
||||
* remove it from the devices list and zero out the old super
|
||||
*/
|
||||
if (clear_super && disk_super) {
|
||||
u64 bytenr;
|
||||
int i;
|
||||
if (clear_super) {
|
||||
struct block_device *bdev;
|
||||
|
||||
/* make sure this device isn't detected as part of
|
||||
* the FS anymore
|
||||
*/
|
||||
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
|
||||
set_buffer_dirty(bh);
|
||||
sync_dirty_buffer(bh);
|
||||
|
||||
/* clear the mirror copies of super block on the disk
|
||||
* being removed, 0th copy is been taken care above and
|
||||
* the below would take of the rest
|
||||
*/
|
||||
for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
||||
bytenr = btrfs_sb_offset(i);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
|
||||
i_size_read(bdev->bd_inode))
|
||||
break;
|
||||
|
||||
brelse(bh);
|
||||
bh = __bread(bdev, bytenr / 4096,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (!bh)
|
||||
continue;
|
||||
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
|
||||
if (btrfs_super_bytenr(disk_super) != bytenr ||
|
||||
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
|
||||
continue;
|
||||
}
|
||||
memset(&disk_super->magic, 0,
|
||||
sizeof(disk_super->magic));
|
||||
set_buffer_dirty(bh);
|
||||
sync_dirty_buffer(bh);
|
||||
bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL,
|
||||
root->fs_info->bdev_holder);
|
||||
if (!IS_ERR(bdev)) {
|
||||
btrfs_scratch_superblocks(bdev, dev_name);
|
||||
blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
if (bdev) {
|
||||
/* Notify udev that device has changed */
|
||||
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
|
||||
|
||||
/* Update ctime/mtime for device path for libblkid */
|
||||
update_dev_time(device_path);
|
||||
}
|
||||
|
||||
error_brelse:
|
||||
brelse(bh);
|
||||
if (bdev)
|
||||
blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
|
||||
out:
|
||||
kfree(dev_name);
|
||||
|
||||
mutex_unlock(&uuid_mutex);
|
||||
return ret;
|
||||
|
||||
error_undo:
|
||||
if (device->writeable) {
|
||||
lock_chunks(root);
|
||||
@ -1948,7 +1946,7 @@ error_undo:
|
||||
device->fs_devices->rw_devices++;
|
||||
unlock_chunks(root);
|
||||
}
|
||||
goto error_brelse;
|
||||
goto out;
|
||||
}
|
||||
|
||||
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
|
||||
@ -1972,11 +1970,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
|
||||
if (srcdev->missing)
|
||||
fs_devices->missing_devices--;
|
||||
|
||||
if (srcdev->writeable) {
|
||||
if (srcdev->writeable)
|
||||
fs_devices->rw_devices--;
|
||||
/* zero out the old super if it is writable */
|
||||
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
|
||||
}
|
||||
|
||||
if (srcdev->bdev)
|
||||
fs_devices->open_devices--;
|
||||
@ -1987,6 +1982,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
|
||||
|
||||
if (srcdev->writeable) {
|
||||
/* zero out the old super if it is writable */
|
||||
btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
|
||||
}
|
||||
call_rcu(&srcdev->rcu, free_device);
|
||||
|
||||
/*
|
||||
@ -2016,32 +2015,33 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
|
||||
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *tgtdev)
|
||||
{
|
||||
struct btrfs_device *next_device;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
WARN_ON(!tgtdev);
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
|
||||
|
||||
if (tgtdev->bdev) {
|
||||
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
|
||||
if (tgtdev->bdev)
|
||||
fs_info->fs_devices->open_devices--;
|
||||
}
|
||||
|
||||
fs_info->fs_devices->num_devices--;
|
||||
|
||||
next_device = list_entry(fs_info->fs_devices->devices.next,
|
||||
struct btrfs_device, dev_list);
|
||||
if (tgtdev->bdev == fs_info->sb->s_bdev)
|
||||
fs_info->sb->s_bdev = next_device->bdev;
|
||||
if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
|
||||
fs_info->fs_devices->latest_bdev = next_device->bdev;
|
||||
list_del_rcu(&tgtdev->dev_list);
|
||||
btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
|
||||
|
||||
call_rcu(&tgtdev->rcu, free_device);
|
||||
list_del_rcu(&tgtdev->dev_list);
|
||||
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
|
||||
/*
|
||||
* The update_dev_time() with in btrfs_scratch_superblocks()
|
||||
* may lead to a call to btrfs_show_devname() which will try
|
||||
* to hold device_list_mutex. And here this device
|
||||
* is already out of device list, so we don't have to hold
|
||||
* the device_list_mutex lock.
|
||||
*/
|
||||
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
|
||||
call_rcu(&tgtdev->rcu, free_device);
|
||||
}
|
||||
|
||||
static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
|
||||
@ -2101,6 +2101,31 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup a device given by device id, or the path if the id is 0.
|
||||
*/
|
||||
int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid,
|
||||
char *devpath,
|
||||
struct btrfs_device **device)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (devid) {
|
||||
ret = 0;
|
||||
*device = btrfs_find_device(root->fs_info, devid, NULL,
|
||||
NULL);
|
||||
if (!*device)
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
if (!devpath || !devpath[0])
|
||||
return -EINVAL;
|
||||
|
||||
ret = btrfs_find_device_missing_or_by_path(root, devpath,
|
||||
device);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* does all the dirty work required for changing file system's UUID.
|
||||
*/
|
||||
@ -2418,7 +2443,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
|
||||
ret = btrfs_relocate_sys_chunks(root);
|
||||
if (ret < 0)
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
btrfs_handle_fs_error(root->fs_info, ret,
|
||||
"Failed to relocate sys chunks after "
|
||||
"device initialization. This can be fixed "
|
||||
"using the \"btrfs balance\" command.");
|
||||
@ -2663,7 +2688,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0) { /* Logic error or corruption */
|
||||
btrfs_std_error(root->fs_info, -ENOENT,
|
||||
btrfs_handle_fs_error(root->fs_info, -ENOENT,
|
||||
"Failed lookup while freeing chunk.");
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
@ -2671,7 +2696,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
if (ret < 0)
|
||||
btrfs_std_error(root->fs_info, ret,
|
||||
btrfs_handle_fs_error(root->fs_info, ret,
|
||||
"Failed to delete chunk item.");
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@ -2857,7 +2882,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
|
||||
chunk_offset);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
btrfs_std_error(root->fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(root->fs_info, ret, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3402,6 +3427,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
|
||||
u32 count_meta = 0;
|
||||
u32 count_sys = 0;
|
||||
int chunk_reserved = 0;
|
||||
u64 bytes_used = 0;
|
||||
|
||||
/* step one make some room on all the devices */
|
||||
devices = &fs_info->fs_devices->devices;
|
||||
@ -3540,7 +3566,13 @@ again:
|
||||
goto loop;
|
||||
}
|
||||
|
||||
if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
|
||||
ASSERT(fs_info->data_sinfo);
|
||||
spin_lock(&fs_info->data_sinfo->lock);
|
||||
bytes_used = fs_info->data_sinfo->bytes_used;
|
||||
spin_unlock(&fs_info->data_sinfo->lock);
|
||||
|
||||
if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
!chunk_reserved && !bytes_used) {
|
||||
trans = btrfs_start_transaction(chunk_root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
|
||||
@ -3632,7 +3664,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
|
||||
unset_balance_control(fs_info);
|
||||
ret = del_balance_item(fs_info->tree_root);
|
||||
if (ret)
|
||||
btrfs_std_error(fs_info, ret, NULL);
|
||||
btrfs_handle_fs_error(fs_info, ret, NULL);
|
||||
|
||||
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
|
||||
}
|
||||
@ -3693,10 +3725,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
|
||||
num_devices--;
|
||||
}
|
||||
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
|
||||
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
if (num_devices == 1)
|
||||
allowed |= BTRFS_BLOCK_GROUP_DUP;
|
||||
else if (num_devices > 1)
|
||||
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
|
||||
if (num_devices > 1)
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
|
||||
if (num_devices > 2)
|
||||
allowed |= BTRFS_BLOCK_GROUP_RAID5;
|
||||
@ -5278,7 +5308,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
|
||||
stripe_nr = div64_u64(stripe_nr, stripe_len);
|
||||
|
||||
stripe_offset = stripe_nr * stripe_len;
|
||||
BUG_ON(offset < stripe_offset);
|
||||
if (offset < stripe_offset) {
|
||||
btrfs_crit(fs_info, "stripe math has gone wrong, "
|
||||
"stripe_offset=%llu, offset=%llu, start=%llu, "
|
||||
"logical=%llu, stripe_len=%llu",
|
||||
stripe_offset, offset, em->start, logical,
|
||||
stripe_len);
|
||||
free_extent_map(em);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* stripe_offset is the offset of this block in its stripe*/
|
||||
stripe_offset = offset - stripe_offset;
|
||||
@ -5519,7 +5557,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
|
||||
&stripe_index);
|
||||
mirror_num = stripe_index + 1;
|
||||
}
|
||||
BUG_ON(stripe_index >= map->num_stripes);
|
||||
if (stripe_index >= map->num_stripes) {
|
||||
btrfs_crit(fs_info, "stripe index math went horribly wrong, "
|
||||
"got stripe_index=%u, num_stripes=%u",
|
||||
stripe_index, map->num_stripes);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_alloc_stripes = num_stripes;
|
||||
if (dev_replace_is_ongoing) {
|
||||
@ -6242,7 +6286,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
|
||||
"invalid chunk length %llu", length);
|
||||
return -EIO;
|
||||
}
|
||||
if (!is_power_of_2(stripe_len)) {
|
||||
if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
|
||||
btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
|
||||
stripe_len);
|
||||
return -EIO;
|
||||
|
@ -340,14 +340,14 @@ struct btrfs_raid_attr {
|
||||
};
|
||||
|
||||
extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
|
||||
|
||||
extern const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES];
|
||||
extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES];
|
||||
|
||||
struct map_lookup {
|
||||
u64 type;
|
||||
int io_align;
|
||||
int io_width;
|
||||
int stripe_len;
|
||||
u64 stripe_len;
|
||||
int sector_size;
|
||||
int num_stripes;
|
||||
int sub_stripes;
|
||||
@ -357,52 +357,6 @@ struct map_lookup {
|
||||
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
|
||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
||||
|
||||
/*
|
||||
* Restriper's general type filter
|
||||
*/
|
||||
#define BTRFS_BALANCE_DATA (1ULL << 0)
|
||||
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
|
||||
#define BTRFS_BALANCE_METADATA (1ULL << 2)
|
||||
|
||||
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
|
||||
BTRFS_BALANCE_SYSTEM | \
|
||||
BTRFS_BALANCE_METADATA)
|
||||
|
||||
#define BTRFS_BALANCE_FORCE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_RESUME (1ULL << 4)
|
||||
|
||||
/*
|
||||
* Balance filters
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
|
||||
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
|
||||
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
|
||||
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
|
||||
|
||||
#define BTRFS_BALANCE_ARGS_MASK \
|
||||
(BTRFS_BALANCE_ARGS_PROFILES | \
|
||||
BTRFS_BALANCE_ARGS_USAGE | \
|
||||
BTRFS_BALANCE_ARGS_DEVID | \
|
||||
BTRFS_BALANCE_ARGS_DRANGE | \
|
||||
BTRFS_BALANCE_ARGS_VRANGE | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_STRIPES_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_USAGE_RANGE)
|
||||
|
||||
/*
|
||||
* Profile changing flags. When SOFT is set we won't relocate chunk if
|
||||
* it already has the target profile (even though it may be
|
||||
* half-filled).
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
|
||||
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
|
||||
|
||||
struct btrfs_balance_args;
|
||||
struct btrfs_balance_progress;
|
||||
struct btrfs_balance_control {
|
||||
@ -445,13 +399,18 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
||||
struct btrfs_fs_devices **fs_devices_ret);
|
||||
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
||||
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
|
||||
void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *device, struct btrfs_device *this_dev);
|
||||
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
|
||||
char *device_path,
|
||||
struct btrfs_device **device);
|
||||
int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid,
|
||||
char *devpath,
|
||||
struct btrfs_device **device);
|
||||
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
|
||||
const u64 *devid,
|
||||
const u8 *uuid);
|
||||
int btrfs_rm_device(struct btrfs_root *root, char *device_path);
|
||||
int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid);
|
||||
void btrfs_cleanup_fs_uuids(void);
|
||||
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#define BTRFS_IOCTL_MAGIC 0x94
|
||||
#define BTRFS_VOL_NAME_MAX 255
|
||||
#define BTRFS_LABEL_SIZE 256
|
||||
|
||||
/* this should be 4k */
|
||||
#define BTRFS_PATH_NAME_MAX 4087
|
||||
@ -33,14 +34,31 @@ struct btrfs_ioctl_vol_args {
|
||||
|
||||
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
|
||||
|
||||
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
|
||||
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
|
||||
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
|
||||
#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3)
|
||||
|
||||
#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \
|
||||
(BTRFS_SUBVOL_CREATE_ASYNC | \
|
||||
BTRFS_SUBVOL_RDONLY | \
|
||||
BTRFS_SUBVOL_QGROUP_INHERIT | \
|
||||
BTRFS_DEVICE_SPEC_BY_ID)
|
||||
|
||||
#define BTRFS_FSID_SIZE 16
|
||||
#define BTRFS_UUID_SIZE 16
|
||||
#define BTRFS_UUID_UNPARSED_SIZE 37
|
||||
|
||||
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
|
||||
/*
|
||||
* flags definition for qgroup limits
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_qgroup_limit.flags
|
||||
* struct btrfs_qgroup_limit_item.flags
|
||||
*/
|
||||
#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
|
||||
#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
|
||||
#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
|
||||
#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
|
||||
#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
|
||||
#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
|
||||
|
||||
struct btrfs_qgroup_limit {
|
||||
__u64 flags;
|
||||
@ -50,6 +68,14 @@ struct btrfs_qgroup_limit {
|
||||
__u64 rsv_excl;
|
||||
};
|
||||
|
||||
/*
|
||||
* flags definition for qgroup inheritance
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_qgroup_inherit.flags
|
||||
*/
|
||||
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
|
||||
|
||||
struct btrfs_qgroup_inherit {
|
||||
__u64 flags;
|
||||
__u64 num_qgroups;
|
||||
@ -64,6 +90,20 @@ struct btrfs_ioctl_qgroup_limit_args {
|
||||
struct btrfs_qgroup_limit lim;
|
||||
};
|
||||
|
||||
/*
|
||||
* flags for subvolumes
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_ioctl_vol_args_v2.flags
|
||||
*
|
||||
* BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls:
|
||||
* - BTRFS_IOC_SUBVOL_GETFLAGS
|
||||
* - BTRFS_IOC_SUBVOL_SETFLAGS
|
||||
*/
|
||||
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
|
||||
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
|
||||
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
|
||||
|
||||
#define BTRFS_SUBVOL_NAME_MAX 4039
|
||||
struct btrfs_ioctl_vol_args_v2 {
|
||||
__s64 fd;
|
||||
@ -76,7 +116,10 @@ struct btrfs_ioctl_vol_args_v2 {
|
||||
};
|
||||
__u64 unused[4];
|
||||
};
|
||||
char name[BTRFS_SUBVOL_NAME_MAX + 1];
|
||||
union {
|
||||
char name[BTRFS_SUBVOL_NAME_MAX + 1];
|
||||
u64 devid;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
@ -190,6 +233,37 @@ struct btrfs_ioctl_fs_info_args {
|
||||
__u64 reserved[122]; /* pad to 1k */
|
||||
};
|
||||
|
||||
/*
|
||||
* feature flags
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_ioctl_feature_flags
|
||||
*/
|
||||
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
|
||||
|
||||
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
|
||||
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
|
||||
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
|
||||
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
|
||||
/*
|
||||
* some patches floated around with a second compression method
|
||||
* lets save that incompat here for when they do get in
|
||||
* Note we don't actually support it, we're just reserving the
|
||||
* number
|
||||
*/
|
||||
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
|
||||
|
||||
/*
|
||||
* older kernels tried to do bigger metadata blocks, but the
|
||||
* code was pretty buggy. Lets not let them try anymore.
|
||||
*/
|
||||
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
|
||||
|
||||
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
|
||||
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
|
||||
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
|
||||
#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
|
||||
|
||||
struct btrfs_ioctl_feature_flags {
|
||||
__u64 compat_flags;
|
||||
__u64 compat_ro_flags;
|
||||
@ -254,6 +328,70 @@ struct btrfs_balance_progress {
|
||||
__u64 completed; /* # of chunks relocated so far */
|
||||
};
|
||||
|
||||
/*
|
||||
* flags definition for balance
|
||||
*
|
||||
* Restriper's general type filter
|
||||
*
|
||||
* Used by:
|
||||
* btrfs_ioctl_balance_args.flags
|
||||
* btrfs_balance_control.flags (internal)
|
||||
*/
|
||||
#define BTRFS_BALANCE_DATA (1ULL << 0)
|
||||
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
|
||||
#define BTRFS_BALANCE_METADATA (1ULL << 2)
|
||||
|
||||
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
|
||||
BTRFS_BALANCE_SYSTEM | \
|
||||
BTRFS_BALANCE_METADATA)
|
||||
|
||||
#define BTRFS_BALANCE_FORCE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_RESUME (1ULL << 4)
|
||||
|
||||
/*
|
||||
* flags definitions for per-type balance args
|
||||
*
|
||||
* Balance filters
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_balance_args
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
|
||||
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
|
||||
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
|
||||
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
|
||||
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
|
||||
|
||||
#define BTRFS_BALANCE_ARGS_MASK \
|
||||
(BTRFS_BALANCE_ARGS_PROFILES | \
|
||||
BTRFS_BALANCE_ARGS_USAGE | \
|
||||
BTRFS_BALANCE_ARGS_DEVID | \
|
||||
BTRFS_BALANCE_ARGS_DRANGE | \
|
||||
BTRFS_BALANCE_ARGS_VRANGE | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT | \
|
||||
BTRFS_BALANCE_ARGS_LIMIT_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_STRIPES_RANGE | \
|
||||
BTRFS_BALANCE_ARGS_USAGE_RANGE)
|
||||
|
||||
/*
|
||||
* Profile changing flags. When SOFT is set we won't relocate chunk if
|
||||
* it already has the target profile (even though it may be
|
||||
* half-filled).
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
|
||||
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
|
||||
|
||||
|
||||
/*
|
||||
* flags definition for balance state
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_ioctl_balance_args.state
|
||||
*/
|
||||
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
|
||||
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
|
||||
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
|
||||
@ -347,9 +485,45 @@ struct btrfs_ioctl_clone_range_args {
|
||||
__u64 dest_offset;
|
||||
};
|
||||
|
||||
/* flags for the defrag range ioctl */
|
||||
/*
|
||||
* flags definition for the defrag range ioctl
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_ioctl_defrag_range_args.flags
|
||||
*/
|
||||
#define BTRFS_DEFRAG_RANGE_COMPRESS 1
|
||||
#define BTRFS_DEFRAG_RANGE_START_IO 2
|
||||
struct btrfs_ioctl_defrag_range_args {
|
||||
/* start of the defrag operation */
|
||||
__u64 start;
|
||||
|
||||
/* number of bytes to defrag, use (u64)-1 to say all */
|
||||
__u64 len;
|
||||
|
||||
/*
|
||||
* flags for the operation, which can include turning
|
||||
* on compression for this one defrag
|
||||
*/
|
||||
__u64 flags;
|
||||
|
||||
/*
|
||||
* any extent bigger than this will be considered
|
||||
* already defragged. Use 0 to take the kernel default
|
||||
* Use 1 to say every single extent must be rewritten
|
||||
*/
|
||||
__u32 extent_thresh;
|
||||
|
||||
/*
|
||||
* which compression method to use if turning on compression
|
||||
* for this defrag operation. If unspecified, zlib will
|
||||
* be used
|
||||
*/
|
||||
__u32 compress_type;
|
||||
|
||||
/* spare for later */
|
||||
__u32 unused[4];
|
||||
};
|
||||
|
||||
|
||||
#define BTRFS_SAME_DATA_DIFFERS 1
|
||||
/* For extent-same ioctl */
|
||||
@ -659,5 +833,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
|
||||
struct btrfs_ioctl_feature_flags[2])
|
||||
#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
|
||||
struct btrfs_ioctl_feature_flags[3])
|
||||
#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
|
||||
struct btrfs_ioctl_vol_args_v2)
|
||||
|
||||
#endif /* _UAPI_LINUX_BTRFS_H */
|
||||
|
966
include/uapi/linux/btrfs_tree.h
Normal file
966
include/uapi/linux/btrfs_tree.h
Normal file
@ -0,0 +1,966 @@
|
||||
#ifndef _BTRFS_CTREE_H_
|
||||
#define _BTRFS_CTREE_H_
|
||||
|
||||
/*
|
||||
* This header contains the structure definitions and constants used
|
||||
* by file system objects that can be retrieved using
|
||||
* the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that
|
||||
* is needed to describe a leaf node's key or item contents.
|
||||
*/
|
||||
|
||||
/* holds pointers to all of the tree roots */
|
||||
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
|
||||
|
||||
/* stores information about which extents are in use, and reference counts */
|
||||
#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
|
||||
|
||||
/*
|
||||
* chunk tree stores translations from logical -> physical block numbering
|
||||
* the super block points to the chunk tree
|
||||
*/
|
||||
#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
|
||||
|
||||
/*
|
||||
* stores information about which areas of a given device are in use.
|
||||
* one per device. The tree of tree roots points to the device tree
|
||||
*/
|
||||
#define BTRFS_DEV_TREE_OBJECTID 4ULL
|
||||
|
||||
/* one per subvolume, storing files and directories */
|
||||
#define BTRFS_FS_TREE_OBJECTID 5ULL
|
||||
|
||||
/* directory objectid inside the root tree */
|
||||
#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
|
||||
|
||||
/* holds checksums of all the data extents */
|
||||
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
|
||||
|
||||
/* holds quota configuration and tracking */
|
||||
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
|
||||
|
||||
/* for storing items that use the BTRFS_UUID_KEY* types */
|
||||
#define BTRFS_UUID_TREE_OBJECTID 9ULL
|
||||
|
||||
/* tracks free space in block groups. */
|
||||
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
|
||||
|
||||
/* device stats in the device tree */
|
||||
#define BTRFS_DEV_STATS_OBJECTID 0ULL
|
||||
|
||||
/* for storing balance parameters in the root tree */
|
||||
#define BTRFS_BALANCE_OBJECTID -4ULL
|
||||
|
||||
/* orhpan objectid for tracking unlinked/truncated files */
|
||||
#define BTRFS_ORPHAN_OBJECTID -5ULL
|
||||
|
||||
/* does write ahead logging to speed up fsyncs */
|
||||
#define BTRFS_TREE_LOG_OBJECTID -6ULL
|
||||
#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
|
||||
|
||||
/* for space balancing */
|
||||
#define BTRFS_TREE_RELOC_OBJECTID -8ULL
|
||||
#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
|
||||
|
||||
/*
|
||||
* extent checksums all have this objectid
|
||||
* this allows them to share the logging tree
|
||||
* for fsyncs
|
||||
*/
|
||||
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
|
||||
|
||||
/* For storing free space cache */
|
||||
#define BTRFS_FREE_SPACE_OBJECTID -11ULL
|
||||
|
||||
/*
|
||||
* The inode number assigned to the special inode for storing
|
||||
* free ino cache
|
||||
*/
|
||||
#define BTRFS_FREE_INO_OBJECTID -12ULL
|
||||
|
||||
/* dummy objectid represents multiple objectids */
|
||||
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
|
||||
|
||||
/*
|
||||
* All files have objectids in this range.
|
||||
*/
|
||||
#define BTRFS_FIRST_FREE_OBJECTID 256ULL
|
||||
#define BTRFS_LAST_FREE_OBJECTID -256ULL
|
||||
#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
|
||||
|
||||
|
||||
/*
|
||||
* the device items go into the chunk tree. The key is in the form
|
||||
* [ 1 BTRFS_DEV_ITEM_KEY device_id ]
|
||||
*/
|
||||
#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
|
||||
|
||||
#define BTRFS_BTREE_INODE_OBJECTID 1
|
||||
|
||||
#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
|
||||
|
||||
#define BTRFS_DEV_REPLACE_DEVID 0ULL
|
||||
|
||||
/*
|
||||
* inode items have the data typically returned from stat and store other
|
||||
* info about object characteristics. There is one for every file and dir in
|
||||
* the FS
|
||||
*/
|
||||
#define BTRFS_INODE_ITEM_KEY 1
|
||||
#define BTRFS_INODE_REF_KEY 12
|
||||
#define BTRFS_INODE_EXTREF_KEY 13
|
||||
#define BTRFS_XATTR_ITEM_KEY 24
|
||||
#define BTRFS_ORPHAN_ITEM_KEY 48
|
||||
/* reserve 2-15 close to the inode for later flexibility */
|
||||
|
||||
/*
|
||||
* dir items are the name -> inode pointers in a directory. There is one
|
||||
* for every name in a directory.
|
||||
*/
|
||||
#define BTRFS_DIR_LOG_ITEM_KEY 60
|
||||
#define BTRFS_DIR_LOG_INDEX_KEY 72
|
||||
#define BTRFS_DIR_ITEM_KEY 84
|
||||
#define BTRFS_DIR_INDEX_KEY 96
|
||||
/*
|
||||
* extent data is for file data
|
||||
*/
|
||||
#define BTRFS_EXTENT_DATA_KEY 108
|
||||
|
||||
/*
|
||||
* extent csums are stored in a separate tree and hold csums for
|
||||
* an entire extent on disk.
|
||||
*/
|
||||
#define BTRFS_EXTENT_CSUM_KEY 128
|
||||
|
||||
/*
|
||||
* root items point to tree roots. They are typically in the root
|
||||
* tree used by the super block to find all the other trees
|
||||
*/
|
||||
#define BTRFS_ROOT_ITEM_KEY 132
|
||||
|
||||
/*
|
||||
* root backrefs tie subvols and snapshots to the directory entries that
|
||||
* reference them
|
||||
*/
|
||||
#define BTRFS_ROOT_BACKREF_KEY 144
|
||||
|
||||
/*
|
||||
* root refs make a fast index for listing all of the snapshots and
|
||||
* subvolumes referenced by a given root. They point directly to the
|
||||
* directory item in the root that references the subvol
|
||||
*/
|
||||
#define BTRFS_ROOT_REF_KEY 156
|
||||
|
||||
/*
|
||||
* extent items are in the extent map tree. These record which blocks
|
||||
* are used, and how many references there are to each block
|
||||
*/
|
||||
#define BTRFS_EXTENT_ITEM_KEY 168
|
||||
|
||||
/*
|
||||
* The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
|
||||
* the length, so we save the level in key->offset instead of the length.
|
||||
*/
|
||||
#define BTRFS_METADATA_ITEM_KEY 169
|
||||
|
||||
#define BTRFS_TREE_BLOCK_REF_KEY 176
|
||||
|
||||
#define BTRFS_EXTENT_DATA_REF_KEY 178
|
||||
|
||||
#define BTRFS_EXTENT_REF_V0_KEY 180
|
||||
|
||||
#define BTRFS_SHARED_BLOCK_REF_KEY 182
|
||||
|
||||
#define BTRFS_SHARED_DATA_REF_KEY 184
|
||||
|
||||
/*
|
||||
* block groups give us hints into the extent allocation trees. Which
|
||||
* blocks are free etc etc
|
||||
*/
|
||||
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
|
||||
|
||||
/*
|
||||
* Every block group is represented in the free space tree by a free space info
|
||||
* item, which stores some accounting information. It is keyed on
|
||||
* (block_group_start, FREE_SPACE_INFO, block_group_length).
|
||||
*/
|
||||
#define BTRFS_FREE_SPACE_INFO_KEY 198
|
||||
|
||||
/*
|
||||
* A free space extent tracks an extent of space that is free in a block group.
|
||||
* It is keyed on (start, FREE_SPACE_EXTENT, length).
|
||||
*/
|
||||
#define BTRFS_FREE_SPACE_EXTENT_KEY 199
|
||||
|
||||
/*
|
||||
* When a block group becomes very fragmented, we convert it to use bitmaps
|
||||
* instead of extents. A free space bitmap is keyed on
|
||||
* (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
|
||||
* (length / sectorsize) bits.
|
||||
*/
|
||||
#define BTRFS_FREE_SPACE_BITMAP_KEY 200
|
||||
|
||||
#define BTRFS_DEV_EXTENT_KEY 204
|
||||
#define BTRFS_DEV_ITEM_KEY 216
|
||||
#define BTRFS_CHUNK_ITEM_KEY 228
|
||||
|
||||
/*
|
||||
* Records the overall state of the qgroups.
|
||||
* There's only one instance of this key present,
|
||||
* (0, BTRFS_QGROUP_STATUS_KEY, 0)
|
||||
*/
|
||||
#define BTRFS_QGROUP_STATUS_KEY 240
|
||||
/*
|
||||
* Records the currently used space of the qgroup.
|
||||
* One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
|
||||
*/
|
||||
#define BTRFS_QGROUP_INFO_KEY 242
|
||||
/*
|
||||
* Contains the user configured limits for the qgroup.
|
||||
* One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
|
||||
*/
|
||||
#define BTRFS_QGROUP_LIMIT_KEY 244
|
||||
/*
|
||||
* Records the child-parent relationship of qgroups. For
|
||||
* each relation, 2 keys are present:
|
||||
* (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
|
||||
* (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
|
||||
*/
|
||||
#define BTRFS_QGROUP_RELATION_KEY 246
|
||||
|
||||
/*
|
||||
* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
|
||||
*/
|
||||
#define BTRFS_BALANCE_ITEM_KEY 248
|
||||
|
||||
/*
|
||||
* The key type for tree items that are stored persistently, but do not need to
|
||||
* exist for extended period of time. The items can exist in any tree.
|
||||
*
|
||||
* [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
|
||||
*
|
||||
* Existing items:
|
||||
*
|
||||
* - balance status item
|
||||
* (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
|
||||
*/
|
||||
#define BTRFS_TEMPORARY_ITEM_KEY 248
|
||||
|
||||
/*
|
||||
* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
|
||||
*/
|
||||
#define BTRFS_DEV_STATS_KEY 249
|
||||
|
||||
/*
|
||||
* The key type for tree items that are stored persistently and usually exist
|
||||
* for a long period, eg. filesystem lifetime. The item kinds can be status
|
||||
* information, stats or preference values. The item can exist in any tree.
|
||||
*
|
||||
* [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
|
||||
*
|
||||
* Existing items:
|
||||
*
|
||||
* - device statistics, store IO stats in the device tree, one key for all
|
||||
* stats
|
||||
* (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
|
||||
*/
|
||||
#define BTRFS_PERSISTENT_ITEM_KEY 249
|
||||
|
||||
/*
|
||||
* Persistantly stores the device replace state in the device tree.
|
||||
* The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
|
||||
*/
|
||||
#define BTRFS_DEV_REPLACE_KEY 250
|
||||
|
||||
/*
|
||||
* Stores items that allow to quickly map UUIDs to something else.
|
||||
* These items are part of the filesystem UUID tree.
|
||||
* The key is built like this:
|
||||
* (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
|
||||
*/
|
||||
#if BTRFS_UUID_SIZE != 16
|
||||
#error "UUID items require BTRFS_UUID_SIZE == 16!"
|
||||
#endif
|
||||
#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
|
||||
#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
|
||||
* received subvols */
|
||||
|
||||
/*
|
||||
* string items are for debugging. They just store a short string of
|
||||
* data in the FS
|
||||
*/
|
||||
#define BTRFS_STRING_ITEM_KEY 253
|
||||
|
||||
|
||||
|
||||
/* 32 bytes in various csum fields */
|
||||
#define BTRFS_CSUM_SIZE 32
|
||||
|
||||
/* csum types */
|
||||
#define BTRFS_CSUM_TYPE_CRC32 0
|
||||
|
||||
/*
|
||||
* flags definitions for directory entry item type
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_dir_item.type
|
||||
*/
|
||||
#define BTRFS_FT_UNKNOWN 0
|
||||
#define BTRFS_FT_REG_FILE 1
|
||||
#define BTRFS_FT_DIR 2
|
||||
#define BTRFS_FT_CHRDEV 3
|
||||
#define BTRFS_FT_BLKDEV 4
|
||||
#define BTRFS_FT_FIFO 5
|
||||
#define BTRFS_FT_SOCK 6
|
||||
#define BTRFS_FT_SYMLINK 7
|
||||
#define BTRFS_FT_XATTR 8
|
||||
#define BTRFS_FT_MAX 9
|
||||
|
||||
/*
|
||||
* The key defines the order in the tree, and so it also defines (optimal)
|
||||
* block layout.
|
||||
*
|
||||
* objectid corresponds to the inode number.
|
||||
*
|
||||
* type tells us things about the object, and is a kind of stream selector.
|
||||
* so for a given inode, keys with type of 1 might refer to the inode data,
|
||||
* type of 2 may point to file data in the btree and type == 3 may point to
|
||||
* extents.
|
||||
*
|
||||
* offset is the starting byte offset for this key in the stream.
|
||||
*
|
||||
* btrfs_disk_key is in disk byte order. struct btrfs_key is always
|
||||
* in cpu native order. Otherwise they are identical and their sizes
|
||||
* should be the same (ie both packed)
|
||||
*/
|
||||
struct btrfs_disk_key {
|
||||
__le64 objectid;
|
||||
__u8 type;
|
||||
__le64 offset;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_key {
|
||||
__u64 objectid;
|
||||
__u8 type;
|
||||
__u64 offset;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_dev_item {
|
||||
/* the internal btrfs device id */
|
||||
__le64 devid;
|
||||
|
||||
/* size of the device */
|
||||
__le64 total_bytes;
|
||||
|
||||
/* bytes used */
|
||||
__le64 bytes_used;
|
||||
|
||||
/* optimal io alignment for this device */
|
||||
__le32 io_align;
|
||||
|
||||
/* optimal io width for this device */
|
||||
__le32 io_width;
|
||||
|
||||
/* minimal io size for this device */
|
||||
__le32 sector_size;
|
||||
|
||||
/* type and info about this device */
|
||||
__le64 type;
|
||||
|
||||
/* expected generation for this device */
|
||||
__le64 generation;
|
||||
|
||||
/*
|
||||
* starting byte of this partition on the device,
|
||||
* to allow for stripe alignment in the future
|
||||
*/
|
||||
__le64 start_offset;
|
||||
|
||||
/* grouping information for allocation decisions */
|
||||
__le32 dev_group;
|
||||
|
||||
/* seek speed 0-100 where 100 is fastest */
|
||||
__u8 seek_speed;
|
||||
|
||||
/* bandwidth 0-100 where 100 is fastest */
|
||||
__u8 bandwidth;
|
||||
|
||||
/* btrfs generated uuid for this device */
|
||||
__u8 uuid[BTRFS_UUID_SIZE];
|
||||
|
||||
/* uuid of FS who owns this device */
|
||||
__u8 fsid[BTRFS_UUID_SIZE];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_stripe {
|
||||
__le64 devid;
|
||||
__le64 offset;
|
||||
__u8 dev_uuid[BTRFS_UUID_SIZE];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_chunk {
|
||||
/* size of this chunk in bytes */
|
||||
__le64 length;
|
||||
|
||||
/* objectid of the root referencing this chunk */
|
||||
__le64 owner;
|
||||
|
||||
__le64 stripe_len;
|
||||
__le64 type;
|
||||
|
||||
/* optimal io alignment for this chunk */
|
||||
__le32 io_align;
|
||||
|
||||
/* optimal io width for this chunk */
|
||||
__le32 io_width;
|
||||
|
||||
/* minimal io size for this chunk */
|
||||
__le32 sector_size;
|
||||
|
||||
/* 2^16 stripes is quite a lot, a second limit is the size of a single
|
||||
* item in the btree
|
||||
*/
|
||||
__le16 num_stripes;
|
||||
|
||||
/* sub stripes only matter for raid10 */
|
||||
__le16 sub_stripes;
|
||||
struct btrfs_stripe stripe;
|
||||
/* additional stripes go here */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_FREE_SPACE_EXTENT 1
|
||||
#define BTRFS_FREE_SPACE_BITMAP 2
|
||||
|
||||
struct btrfs_free_space_entry {
|
||||
__le64 offset;
|
||||
__le64 bytes;
|
||||
__u8 type;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_free_space_header {
|
||||
struct btrfs_disk_key location;
|
||||
__le64 generation;
|
||||
__le64 num_entries;
|
||||
__le64 num_bitmaps;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
|
||||
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
|
||||
|
||||
/* Super block flags */
|
||||
/* Errors detected */
|
||||
#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
|
||||
|
||||
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
|
||||
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
|
||||
|
||||
|
||||
/*
|
||||
* items in the extent btree are used to record the objectid of the
|
||||
* owner of the block and the number of references
|
||||
*/
|
||||
|
||||
struct btrfs_extent_item {
|
||||
__le64 refs;
|
||||
__le64 generation;
|
||||
__le64 flags;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_extent_item_v0 {
|
||||
__le32 refs;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
|
||||
#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
|
||||
#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
|
||||
|
||||
/* following flags only apply to tree blocks */
|
||||
|
||||
/* use full backrefs for extent pointers in the block */
|
||||
#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
|
||||
|
||||
/*
|
||||
* this flag is only used internally by scrub and may be changed at any time
|
||||
* it is only declared here to avoid collisions
|
||||
*/
|
||||
#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
|
||||
|
||||
struct btrfs_tree_block_info {
|
||||
struct btrfs_disk_key key;
|
||||
__u8 level;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_extent_data_ref {
|
||||
__le64 root;
|
||||
__le64 objectid;
|
||||
__le64 offset;
|
||||
__le32 count;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_shared_data_ref {
|
||||
__le32 count;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_extent_inline_ref {
|
||||
__u8 type;
|
||||
__le64 offset;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* old style backrefs item */
|
||||
struct btrfs_extent_ref_v0 {
|
||||
__le64 root;
|
||||
__le64 generation;
|
||||
__le64 objectid;
|
||||
__le32 count;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
|
||||
/* dev extents record free space on individual devices. The owner
|
||||
* field points back to the chunk allocation mapping tree that allocated
|
||||
* the extent. The chunk tree uuid field is a way to double check the owner
|
||||
*/
|
||||
struct btrfs_dev_extent {
|
||||
__le64 chunk_tree;
|
||||
__le64 chunk_objectid;
|
||||
__le64 chunk_offset;
|
||||
__le64 length;
|
||||
__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_inode_ref {
|
||||
__le64 index;
|
||||
__le16 name_len;
|
||||
/* name goes here */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_inode_extref {
|
||||
__le64 parent_objectid;
|
||||
__le64 index;
|
||||
__le16 name_len;
|
||||
__u8 name[0];
|
||||
/* name goes here */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_timespec {
|
||||
__le64 sec;
|
||||
__le32 nsec;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_inode_item {
|
||||
/* nfs style generation number */
|
||||
__le64 generation;
|
||||
/* transid that last touched this inode */
|
||||
__le64 transid;
|
||||
__le64 size;
|
||||
__le64 nbytes;
|
||||
__le64 block_group;
|
||||
__le32 nlink;
|
||||
__le32 uid;
|
||||
__le32 gid;
|
||||
__le32 mode;
|
||||
__le64 rdev;
|
||||
__le64 flags;
|
||||
|
||||
/* modification sequence number for NFS */
|
||||
__le64 sequence;
|
||||
|
||||
/*
|
||||
* a little future expansion, for more than this we can
|
||||
* just grow the inode item and version it
|
||||
*/
|
||||
__le64 reserved[4];
|
||||
struct btrfs_timespec atime;
|
||||
struct btrfs_timespec ctime;
|
||||
struct btrfs_timespec mtime;
|
||||
struct btrfs_timespec otime;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_dir_log_item {
|
||||
__le64 end;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_dir_item {
|
||||
struct btrfs_disk_key location;
|
||||
__le64 transid;
|
||||
__le16 data_len;
|
||||
__le16 name_len;
|
||||
__u8 type;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
|
||||
|
||||
/*
|
||||
* Internal in-memory flag that a subvolume has been marked for deletion but
|
||||
* still visible as a directory
|
||||
*/
|
||||
#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
|
||||
|
||||
struct btrfs_root_item {
|
||||
struct btrfs_inode_item inode;
|
||||
__le64 generation;
|
||||
__le64 root_dirid;
|
||||
__le64 bytenr;
|
||||
__le64 byte_limit;
|
||||
__le64 bytes_used;
|
||||
__le64 last_snapshot;
|
||||
__le64 flags;
|
||||
__le32 refs;
|
||||
struct btrfs_disk_key drop_progress;
|
||||
__u8 drop_level;
|
||||
__u8 level;
|
||||
|
||||
/*
|
||||
* The following fields appear after subvol_uuids+subvol_times
|
||||
* were introduced.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This generation number is used to test if the new fields are valid
|
||||
* and up to date while reading the root item. Every time the root item
|
||||
* is written out, the "generation" field is copied into this field. If
|
||||
* anyone ever mounted the fs with an older kernel, we will have
|
||||
* mismatching generation values here and thus must invalidate the
|
||||
* new fields. See btrfs_update_root and btrfs_find_last_root for
|
||||
* details.
|
||||
* the offset of generation_v2 is also used as the start for the memset
|
||||
* when invalidating the fields.
|
||||
*/
|
||||
__le64 generation_v2;
|
||||
__u8 uuid[BTRFS_UUID_SIZE];
|
||||
__u8 parent_uuid[BTRFS_UUID_SIZE];
|
||||
__u8 received_uuid[BTRFS_UUID_SIZE];
|
||||
__le64 ctransid; /* updated when an inode changes */
|
||||
__le64 otransid; /* trans when created */
|
||||
__le64 stransid; /* trans when sent. non-zero for received subvol */
|
||||
__le64 rtransid; /* trans when received. non-zero for received subvol */
|
||||
struct btrfs_timespec ctime;
|
||||
struct btrfs_timespec otime;
|
||||
struct btrfs_timespec stime;
|
||||
struct btrfs_timespec rtime;
|
||||
__le64 reserved[8]; /* for future */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/*
|
||||
* this is used for both forward and backward root refs
|
||||
*/
|
||||
struct btrfs_root_ref {
|
||||
__le64 dirid;
|
||||
__le64 sequence;
|
||||
__le16 name_len;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_disk_balance_args {
|
||||
/*
|
||||
* profiles to operate on, single is denoted by
|
||||
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
|
||||
*/
|
||||
__le64 profiles;
|
||||
|
||||
/*
|
||||
* usage filter
|
||||
* BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
|
||||
* BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
|
||||
*/
|
||||
union {
|
||||
__le64 usage;
|
||||
struct {
|
||||
__le32 usage_min;
|
||||
__le32 usage_max;
|
||||
};
|
||||
};
|
||||
|
||||
/* devid filter */
|
||||
__le64 devid;
|
||||
|
||||
/* devid subset filter [pstart..pend) */
|
||||
__le64 pstart;
|
||||
__le64 pend;
|
||||
|
||||
/* btrfs virtual address space subset filter [vstart..vend) */
|
||||
__le64 vstart;
|
||||
__le64 vend;
|
||||
|
||||
/*
|
||||
* profile to convert to, single is denoted by
|
||||
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
|
||||
*/
|
||||
__le64 target;
|
||||
|
||||
/* BTRFS_BALANCE_ARGS_* */
|
||||
__le64 flags;
|
||||
|
||||
/*
|
||||
* BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
|
||||
* BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
|
||||
* and maximum
|
||||
*/
|
||||
union {
|
||||
__le64 limit;
|
||||
struct {
|
||||
__le32 limit_min;
|
||||
__le32 limit_max;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* Process chunks that cross stripes_min..stripes_max devices,
|
||||
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
|
||||
*/
|
||||
__le32 stripes_min;
|
||||
__le32 stripes_max;
|
||||
|
||||
__le64 unused[6];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/*
|
||||
* store balance parameters to disk so that balance can be properly
|
||||
* resumed after crash or unmount
|
||||
*/
|
||||
struct btrfs_balance_item {
|
||||
/* BTRFS_BALANCE_* */
|
||||
__le64 flags;
|
||||
|
||||
struct btrfs_disk_balance_args data;
|
||||
struct btrfs_disk_balance_args meta;
|
||||
struct btrfs_disk_balance_args sys;
|
||||
|
||||
__le64 unused[4];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_FILE_EXTENT_INLINE 0
|
||||
#define BTRFS_FILE_EXTENT_REG 1
|
||||
#define BTRFS_FILE_EXTENT_PREALLOC 2
|
||||
|
||||
struct btrfs_file_extent_item {
|
||||
/*
|
||||
* transaction id that created this extent
|
||||
*/
|
||||
__le64 generation;
|
||||
/*
|
||||
* max number of bytes to hold this extent in ram
|
||||
* when we split a compressed extent we can't know how big
|
||||
* each of the resulting pieces will be. So, this is
|
||||
* an upper limit on the size of the extent in ram instead of
|
||||
* an exact limit.
|
||||
*/
|
||||
__le64 ram_bytes;
|
||||
|
||||
/*
|
||||
* 32 bits for the various ways we might encode the data,
|
||||
* including compression and encryption. If any of these
|
||||
* are set to something a given disk format doesn't understand
|
||||
* it is treated like an incompat flag for reading and writing,
|
||||
* but not for stat.
|
||||
*/
|
||||
__u8 compression;
|
||||
__u8 encryption;
|
||||
__le16 other_encoding; /* spare for later use */
|
||||
|
||||
/* are we inline data or a real extent? */
|
||||
__u8 type;
|
||||
|
||||
/*
|
||||
* disk space consumed by the extent, checksum blocks are included
|
||||
* in these numbers
|
||||
*
|
||||
* At this offset in the structure, the inline extent data start.
|
||||
*/
|
||||
__le64 disk_bytenr;
|
||||
__le64 disk_num_bytes;
|
||||
/*
|
||||
* the logical offset in file blocks (no csums)
|
||||
* this extent record is for. This allows a file extent to point
|
||||
* into the middle of an existing extent on disk, sharing it
|
||||
* between two snapshots (useful if some bytes in the middle of the
|
||||
* extent have changed
|
||||
*/
|
||||
__le64 offset;
|
||||
/*
|
||||
* the logical number of file blocks (no csums included). This
|
||||
* always reflects the size uncompressed and without encoding.
|
||||
*/
|
||||
__le64 num_bytes;
|
||||
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_csum_item {
|
||||
__u8 csum;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_dev_stats_item {
|
||||
/*
|
||||
* grow this item struct at the end for future enhancements and keep
|
||||
* the existing values unchanged
|
||||
*/
|
||||
__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
|
||||
#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
|
||||
#define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED 0
|
||||
#define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED 1
|
||||
#define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED 2
|
||||
#define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED 3
|
||||
#define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED 4
|
||||
|
||||
struct btrfs_dev_replace_item {
|
||||
/*
|
||||
* grow this item struct at the end for future enhancements and keep
|
||||
* the existing values unchanged
|
||||
*/
|
||||
__le64 src_devid;
|
||||
__le64 cursor_left;
|
||||
__le64 cursor_right;
|
||||
__le64 cont_reading_from_srcdev_mode;
|
||||
|
||||
__le64 replace_state;
|
||||
__le64 time_started;
|
||||
__le64 time_stopped;
|
||||
__le64 num_write_errors;
|
||||
__le64 num_uncorrectable_read_errors;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* different types of block groups (and chunks) */
|
||||
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
|
||||
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
|
||||
#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
|
||||
#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
|
||||
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
|
||||
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
|
||||
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
|
||||
#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
|
||||
#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
|
||||
#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
|
||||
BTRFS_SPACE_INFO_GLOBAL_RSV)
|
||||
|
||||
enum btrfs_raid_types {
|
||||
BTRFS_RAID_RAID10,
|
||||
BTRFS_RAID_RAID1,
|
||||
BTRFS_RAID_DUP,
|
||||
BTRFS_RAID_RAID0,
|
||||
BTRFS_RAID_SINGLE,
|
||||
BTRFS_RAID_RAID5,
|
||||
BTRFS_RAID_RAID6,
|
||||
BTRFS_NR_RAID_TYPES
|
||||
};
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
|
||||
BTRFS_BLOCK_GROUP_SYSTEM | \
|
||||
BTRFS_BLOCK_GROUP_METADATA)
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
|
||||
BTRFS_BLOCK_GROUP_RAID1 | \
|
||||
BTRFS_BLOCK_GROUP_RAID5 | \
|
||||
BTRFS_BLOCK_GROUP_RAID6 | \
|
||||
BTRFS_BLOCK_GROUP_DUP | \
|
||||
BTRFS_BLOCK_GROUP_RAID10)
|
||||
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
|
||||
BTRFS_BLOCK_GROUP_RAID6)
|
||||
|
||||
/*
|
||||
* We need a bit for restriper to be able to tell when chunks of type
|
||||
* SINGLE are available. This "extended" profile format is used in
|
||||
* fs_info->avail_*_alloc_bits (in-memory) and balance item fields
|
||||
* (on-disk). The corresponding on-disk bit in chunk.type is reserved
|
||||
* to avoid remappings between two formats in future.
|
||||
*/
|
||||
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
|
||||
|
||||
/*
|
||||
* A fake block group type that is used to communicate global block reserve
|
||||
* size to userspace via the SPACE_INFO ioctl.
|
||||
*/
|
||||
#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
|
||||
|
||||
#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
|
||||
BTRFS_AVAIL_ALLOC_BIT_SINGLE)
|
||||
|
||||
static inline __u64 chunk_to_extended(__u64 flags)
|
||||
{
|
||||
if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
|
||||
flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
return flags;
|
||||
}
|
||||
static inline __u64 extended_to_chunk(__u64 flags)
|
||||
{
|
||||
return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
}
|
||||
|
||||
struct btrfs_block_group_item {
|
||||
__le64 used;
|
||||
__le64 chunk_objectid;
|
||||
__le64 flags;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_free_space_info {
|
||||
__le32 extent_count;
|
||||
__le32 flags;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
|
||||
|
||||
#define BTRFS_QGROUP_LEVEL_SHIFT 48
|
||||
static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
|
||||
{
|
||||
return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* is subvolume quota turned on?
|
||||
*/
|
||||
#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
|
||||
/*
|
||||
* RESCAN is set during the initialization phase
|
||||
*/
|
||||
#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
|
||||
/*
|
||||
* Some qgroup entries are known to be out of date,
|
||||
* either because the configuration has changed in a way that
|
||||
* makes a rescan necessary, or because the fs has been mounted
|
||||
* with a non-qgroup-aware version.
|
||||
* Turning qouta off and on again makes it inconsistent, too.
|
||||
*/
|
||||
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
|
||||
|
||||
#define BTRFS_QGROUP_STATUS_VERSION 1
|
||||
|
||||
struct btrfs_qgroup_status_item {
|
||||
__le64 version;
|
||||
/*
|
||||
* the generation is updated during every commit. As older
|
||||
* versions of btrfs are not aware of qgroups, it will be
|
||||
* possible to detect inconsistencies by checking the
|
||||
* generation on mount time
|
||||
*/
|
||||
__le64 generation;
|
||||
|
||||
/* flag definitions see above */
|
||||
__le64 flags;
|
||||
|
||||
/*
|
||||
* only used during scanning to record the progress
|
||||
* of the scan. It contains a logical address
|
||||
*/
|
||||
__le64 rescan;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_qgroup_info_item {
|
||||
__le64 generation;
|
||||
__le64 rfer;
|
||||
__le64 rfer_cmpr;
|
||||
__le64 excl;
|
||||
__le64 excl_cmpr;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_qgroup_limit_item {
|
||||
/*
|
||||
* only updated when any of the other values change
|
||||
*/
|
||||
__le64 flags;
|
||||
__le64 max_rfer;
|
||||
__le64 max_excl;
|
||||
__le64 rsv_rfer;
|
||||
__le64 rsv_excl;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#endif /* _BTRFS_CTREE_H_ */
|
Loading…
Reference in New Issue
Block a user