mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-15 16:53:54 +08:00
Merge branch 'restriper' of git://github.com/idryomov/btrfs-unstable into integration
This commit is contained in:
commit
27263e2832
205
fs/btrfs/ctree.h
205
fs/btrfs/ctree.h
@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
|
||||
/* holds checksums of all the data extents */
|
||||
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
|
||||
|
||||
/* for storing balance parameters in the root tree */
|
||||
#define BTRFS_BALANCE_OBJECTID -4ULL
|
||||
|
||||
/* orhpan objectid for tracking unlinked/truncated files */
|
||||
#define BTRFS_ORPHAN_OBJECTID -5ULL
|
||||
|
||||
@ -692,6 +695,54 @@ struct btrfs_root_ref {
|
||||
__le16 name_len;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
struct btrfs_disk_balance_args {
|
||||
/*
|
||||
* profiles to operate on, single is denoted by
|
||||
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
|
||||
*/
|
||||
__le64 profiles;
|
||||
|
||||
/* usage filter */
|
||||
__le64 usage;
|
||||
|
||||
/* devid filter */
|
||||
__le64 devid;
|
||||
|
||||
/* devid subset filter [pstart..pend) */
|
||||
__le64 pstart;
|
||||
__le64 pend;
|
||||
|
||||
/* btrfs virtual address space subset filter [vstart..vend) */
|
||||
__le64 vstart;
|
||||
__le64 vend;
|
||||
|
||||
/*
|
||||
* profile to convert to, single is denoted by
|
||||
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
|
||||
*/
|
||||
__le64 target;
|
||||
|
||||
/* BTRFS_BALANCE_ARGS_* */
|
||||
__le64 flags;
|
||||
|
||||
__le64 unused[8];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/*
|
||||
* store balance parameters to disk so that balance can be properly
|
||||
* resumed after crash or unmount
|
||||
*/
|
||||
struct btrfs_balance_item {
|
||||
/* BTRFS_BALANCE_* */
|
||||
__le64 flags;
|
||||
|
||||
struct btrfs_disk_balance_args data;
|
||||
struct btrfs_disk_balance_args meta;
|
||||
struct btrfs_disk_balance_args sys;
|
||||
|
||||
__le64 unused[4];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_FILE_EXTENT_INLINE 0
|
||||
#define BTRFS_FILE_EXTENT_REG 1
|
||||
#define BTRFS_FILE_EXTENT_PREALLOC 2
|
||||
@ -751,14 +802,32 @@ struct btrfs_csum_item {
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* different types of block groups (and chunks) */
|
||||
#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
|
||||
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
|
||||
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
|
||||
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
|
||||
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
|
||||
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
|
||||
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
|
||||
#define BTRFS_NR_RAID_TYPES 5
|
||||
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
|
||||
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
|
||||
#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
|
||||
#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
|
||||
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
|
||||
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
|
||||
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
|
||||
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
|
||||
#define BTRFS_NR_RAID_TYPES 5
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
|
||||
BTRFS_BLOCK_GROUP_SYSTEM | \
|
||||
BTRFS_BLOCK_GROUP_METADATA)
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
|
||||
BTRFS_BLOCK_GROUP_RAID1 | \
|
||||
BTRFS_BLOCK_GROUP_DUP | \
|
||||
BTRFS_BLOCK_GROUP_RAID10)
|
||||
/*
|
||||
* We need a bit for restriper to be able to tell when chunks of type
|
||||
* SINGLE are available. This "extended" profile format is used in
|
||||
* fs_info->avail_*_alloc_bits (in-memory) and balance item fields
|
||||
* (on-disk). The corresponding on-disk bit in chunk.type is reserved
|
||||
* to avoid remappings between two formats in future.
|
||||
*/
|
||||
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
|
||||
|
||||
struct btrfs_block_group_item {
|
||||
__le64 used;
|
||||
@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
|
||||
struct reloc_control;
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
struct btrfs_balance_control;
|
||||
struct btrfs_delayed_root;
|
||||
struct btrfs_fs_info {
|
||||
u8 fsid[BTRFS_FSID_SIZE];
|
||||
@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
|
||||
spinlock_t ref_cache_lock;
|
||||
u64 total_ref_cache_size;
|
||||
|
||||
/*
|
||||
* these three are in extended format (availability of single
|
||||
* chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
|
||||
* types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
|
||||
*/
|
||||
u64 avail_data_alloc_bits;
|
||||
u64 avail_metadata_alloc_bits;
|
||||
u64 avail_system_alloc_bits;
|
||||
u64 data_alloc_profile;
|
||||
u64 metadata_alloc_profile;
|
||||
u64 system_alloc_profile;
|
||||
|
||||
/* restriper state */
|
||||
spinlock_t balance_lock;
|
||||
struct mutex balance_mutex;
|
||||
atomic_t balance_running;
|
||||
atomic_t balance_pause_req;
|
||||
atomic_t balance_cancel_req;
|
||||
struct btrfs_balance_control *balance_ctl;
|
||||
wait_queue_head_t balance_wait_q;
|
||||
|
||||
unsigned data_chunk_allocations;
|
||||
unsigned metadata_ratio;
|
||||
@ -1383,6 +1464,8 @@ struct btrfs_ioctl_defrag_range_args {
|
||||
#define BTRFS_DEV_ITEM_KEY 216
|
||||
#define BTRFS_CHUNK_ITEM_KEY 228
|
||||
|
||||
#define BTRFS_BALANCE_ITEM_KEY 248
|
||||
|
||||
/*
|
||||
* string items are for debugging. They just store a short string of
|
||||
* data in the FS
|
||||
@ -1413,6 +1496,7 @@ struct btrfs_ioctl_defrag_range_args {
|
||||
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
|
||||
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
|
||||
#define BTRFS_MOUNT_RECOVERY (1 << 18)
|
||||
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
|
||||
|
||||
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
|
||||
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
|
||||
@ -2077,8 +2161,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
|
||||
num_devices, 64);
|
||||
|
||||
/* struct btrfs_super_block */
|
||||
/* struct btrfs_balance_item */
|
||||
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
|
||||
|
||||
static inline void btrfs_balance_data(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
|
||||
}
|
||||
|
||||
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
|
||||
}
|
||||
|
||||
static inline void btrfs_balance_meta(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
|
||||
}
|
||||
|
||||
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
|
||||
}
|
||||
|
||||
static inline void btrfs_balance_sys(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
|
||||
}
|
||||
|
||||
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
|
||||
struct btrfs_balance_item *bi,
|
||||
struct btrfs_disk_balance_args *ba)
|
||||
{
|
||||
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
|
||||
}
|
||||
|
||||
static inline void
|
||||
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
|
||||
struct btrfs_disk_balance_args *disk)
|
||||
{
|
||||
memset(cpu, 0, sizeof(*cpu));
|
||||
|
||||
cpu->profiles = le64_to_cpu(disk->profiles);
|
||||
cpu->usage = le64_to_cpu(disk->usage);
|
||||
cpu->devid = le64_to_cpu(disk->devid);
|
||||
cpu->pstart = le64_to_cpu(disk->pstart);
|
||||
cpu->pend = le64_to_cpu(disk->pend);
|
||||
cpu->vstart = le64_to_cpu(disk->vstart);
|
||||
cpu->vend = le64_to_cpu(disk->vend);
|
||||
cpu->target = le64_to_cpu(disk->target);
|
||||
cpu->flags = le64_to_cpu(disk->flags);
|
||||
}
|
||||
|
||||
static inline void
|
||||
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
|
||||
struct btrfs_balance_args *cpu)
|
||||
{
|
||||
memset(disk, 0, sizeof(*disk));
|
||||
|
||||
disk->profiles = cpu_to_le64(cpu->profiles);
|
||||
disk->usage = cpu_to_le64(cpu->usage);
|
||||
disk->devid = cpu_to_le64(cpu->devid);
|
||||
disk->pstart = cpu_to_le64(cpu->pstart);
|
||||
disk->pend = cpu_to_le64(cpu->pend);
|
||||
disk->vstart = cpu_to_le64(cpu->vstart);
|
||||
disk->vend = cpu_to_le64(cpu->vend);
|
||||
disk->target = cpu_to_le64(cpu->target);
|
||||
disk->flags = cpu_to_le64(cpu->flags);
|
||||
}
|
||||
|
||||
/* struct btrfs_super_block */
|
||||
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
|
||||
@ -2500,6 +2662,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
kfree(fs_info->balance_ctl);
|
||||
kfree(fs_info->delayed_root);
|
||||
kfree(fs_info->extent_root);
|
||||
kfree(fs_info->tree_root);
|
||||
@ -2510,6 +2673,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
|
||||
kfree(fs_info->super_for_commit);
|
||||
kfree(fs_info);
|
||||
}
|
||||
/**
|
||||
* profile_is_valid - tests whether a given profile is valid and reduced
|
||||
* @flags: profile to validate
|
||||
* @extended: if true @flags is treated as an extended profile
|
||||
*/
|
||||
static inline int profile_is_valid(u64 flags, int extended)
|
||||
{
|
||||
u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
|
||||
flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
if (extended)
|
||||
mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
if (flags & mask)
|
||||
return 0;
|
||||
/* true if zero or exactly one bit set */
|
||||
return (flags & (~flags + 1)) == flags;
|
||||
}
|
||||
|
||||
/* root-item.c */
|
||||
int btrfs_find_root_ref(struct btrfs_root *tree_root,
|
||||
|
@ -2002,6 +2002,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
||||
init_rwsem(&fs_info->scrub_super_lock);
|
||||
fs_info->scrub_workers_refcnt = 0;
|
||||
|
||||
spin_lock_init(&fs_info->balance_lock);
|
||||
mutex_init(&fs_info->balance_mutex);
|
||||
atomic_set(&fs_info->balance_running, 0);
|
||||
atomic_set(&fs_info->balance_pause_req, 0);
|
||||
atomic_set(&fs_info->balance_cancel_req, 0);
|
||||
fs_info->balance_ctl = NULL;
|
||||
init_waitqueue_head(&fs_info->balance_wait_q);
|
||||
|
||||
sb->s_blocksize = 4096;
|
||||
sb->s_blocksize_bits = blksize_bits(4096);
|
||||
sb->s_bdi = &fs_info->bdi;
|
||||
@ -2321,9 +2329,6 @@ retry_root_backup:
|
||||
|
||||
fs_info->generation = generation;
|
||||
fs_info->last_trans_committed = generation;
|
||||
fs_info->data_alloc_profile = (u64)-1;
|
||||
fs_info->metadata_alloc_profile = (u64)-1;
|
||||
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
|
||||
|
||||
ret = btrfs_init_space_info(fs_info);
|
||||
if (ret) {
|
||||
@ -2426,6 +2431,10 @@ retry_root_backup:
|
||||
if (!err)
|
||||
err = btrfs_orphan_cleanup(fs_info->tree_root);
|
||||
up_read(&fs_info->cleanup_work_sem);
|
||||
|
||||
if (!err)
|
||||
err = btrfs_recover_balance(fs_info->tree_root);
|
||||
|
||||
if (err) {
|
||||
close_ctree(tree_root);
|
||||
return ERR_PTR(err);
|
||||
@ -2975,6 +2984,9 @@ int close_ctree(struct btrfs_root *root)
|
||||
fs_info->closing = 1;
|
||||
smp_mb();
|
||||
|
||||
/* pause restriper - we want to resume on mount */
|
||||
btrfs_pause_balance(root->fs_info);
|
||||
|
||||
btrfs_scrub_cancel(root);
|
||||
|
||||
/* wait for any defraggers to finish */
|
||||
|
@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
|
||||
struct list_head *head = &info->space_info;
|
||||
struct btrfs_space_info *found;
|
||||
|
||||
flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
|
||||
BTRFS_BLOCK_GROUP_METADATA;
|
||||
flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(found, head, list) {
|
||||
@ -2999,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
INIT_LIST_HEAD(&found->block_groups[i]);
|
||||
init_rwsem(&found->groups_sem);
|
||||
spin_lock_init(&found->lock);
|
||||
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
|
||||
BTRFS_BLOCK_GROUP_SYSTEM |
|
||||
BTRFS_BLOCK_GROUP_METADATA);
|
||||
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
found->total_bytes = total_bytes;
|
||||
found->disk_total = total_bytes * factor;
|
||||
found->bytes_used = bytes_used;
|
||||
@ -3022,20 +3019,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
|
||||
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
{
|
||||
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_DUP);
|
||||
if (extra_flags) {
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
fs_info->avail_data_alloc_bits |= extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
fs_info->avail_metadata_alloc_bits |= extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
fs_info->avail_system_alloc_bits |= extra_flags;
|
||||
}
|
||||
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
|
||||
/* chunk -> extended profile */
|
||||
if (extra_flags == 0)
|
||||
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
fs_info->avail_data_alloc_bits |= extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
fs_info->avail_metadata_alloc_bits |= extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
fs_info->avail_system_alloc_bits |= extra_flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* @flags: available profiles in extended format (see ctree.h)
|
||||
*
|
||||
* Returns reduced profile in chunk format. If profile changing is in
|
||||
* progress (either running or paused) picks the target profile (if it's
|
||||
* already available), otherwise falls back to plain reducing.
|
||||
*/
|
||||
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
{
|
||||
/*
|
||||
@ -3046,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
u64 num_devices = root->fs_info->fs_devices->rw_devices +
|
||||
root->fs_info->fs_devices->missing_devices;
|
||||
|
||||
/* pick restriper's target profile if it's available */
|
||||
spin_lock(&root->fs_info->balance_lock);
|
||||
if (root->fs_info->balance_ctl) {
|
||||
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
|
||||
u64 tgt = 0;
|
||||
|
||||
if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(flags & bctl->data.target)) {
|
||||
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
|
||||
} else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
|
||||
(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(flags & bctl->sys.target)) {
|
||||
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
|
||||
} else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(flags & bctl->meta.target)) {
|
||||
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
|
||||
}
|
||||
|
||||
if (tgt) {
|
||||
spin_unlock(&root->fs_info->balance_lock);
|
||||
flags = tgt;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock(&root->fs_info->balance_lock);
|
||||
|
||||
if (num_devices == 1)
|
||||
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
|
||||
if (num_devices < 4)
|
||||
@ -3065,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
|
||||
((flags & BTRFS_BLOCK_GROUP_RAID1) |
|
||||
(flags & BTRFS_BLOCK_GROUP_RAID10) |
|
||||
(flags & BTRFS_BLOCK_GROUP_DUP)))
|
||||
(flags & BTRFS_BLOCK_GROUP_DUP))) {
|
||||
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
|
||||
}
|
||||
|
||||
out:
|
||||
/* extended -> chunk profile */
|
||||
flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
return flags;
|
||||
}
|
||||
|
||||
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
|
||||
{
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
flags |= root->fs_info->avail_data_alloc_bits &
|
||||
root->fs_info->data_alloc_profile;
|
||||
flags |= root->fs_info->avail_data_alloc_bits;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
flags |= root->fs_info->avail_system_alloc_bits &
|
||||
root->fs_info->system_alloc_profile;
|
||||
flags |= root->fs_info->avail_system_alloc_bits;
|
||||
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
flags |= root->fs_info->avail_metadata_alloc_bits &
|
||||
root->fs_info->metadata_alloc_profile;
|
||||
flags |= root->fs_info->avail_metadata_alloc_bits;
|
||||
|
||||
return btrfs_reduce_alloc_profile(root, flags);
|
||||
}
|
||||
|
||||
@ -3282,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
int wait_for_alloc = 0;
|
||||
int ret = 0;
|
||||
|
||||
flags = btrfs_reduce_alloc_profile(extent_root, flags);
|
||||
BUG_ON(!profile_is_valid(flags, 0));
|
||||
|
||||
space_info = __find_space_info(extent_root->fs_info, flags);
|
||||
if (!space_info) {
|
||||
@ -6792,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
|
||||
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
|
||||
|
||||
if (root->fs_info->balance_ctl) {
|
||||
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
|
||||
u64 tgt = 0;
|
||||
|
||||
/* pick restriper's target profile and return */
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA &&
|
||||
bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
|
||||
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
|
||||
} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
|
||||
bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
|
||||
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
|
||||
} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
|
||||
bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
|
||||
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
|
||||
}
|
||||
|
||||
if (tgt) {
|
||||
/* extended -> chunk profile */
|
||||
tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
return tgt;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* we add in the count of missing devices because we want
|
||||
* to make sure that any RAID levels on a degraded FS
|
||||
@ -7466,6 +7524,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
{
|
||||
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
|
||||
/* chunk -> extended profile */
|
||||
if (extra_flags == 0)
|
||||
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
fs_info->avail_data_alloc_bits &= ~extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
fs_info->avail_metadata_alloc_bits &= ~extra_flags;
|
||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
fs_info->avail_system_alloc_bits &= ~extra_flags;
|
||||
}
|
||||
|
||||
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 group_start)
|
||||
{
|
||||
@ -7476,6 +7550,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_key key;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
int index;
|
||||
int factor;
|
||||
|
||||
root = root->fs_info->extent_root;
|
||||
@ -7491,6 +7566,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
free_excluded_extents(root, block_group);
|
||||
|
||||
memcpy(&key, &block_group->key, sizeof(key));
|
||||
index = get_block_group_index(block_group);
|
||||
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
@ -7565,6 +7641,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
* are still on the list after taking the semaphore
|
||||
*/
|
||||
list_del_init(&block_group->list);
|
||||
if (list_empty(&block_group->space_info->block_groups[index]))
|
||||
clear_avail_alloc_bits(root->fs_info, block_group->flags);
|
||||
up_write(&block_group->space_info->groups_sem);
|
||||
|
||||
if (block_group->cached == BTRFS_CACHE_STARTED)
|
||||
|
226
fs/btrfs/ioctl.c
226
fs/btrfs/ioctl.c
@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
if (root->fs_info->balance_ctl) {
|
||||
printk(KERN_INFO "btrfs: balance in progress\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
if (IS_ERR(vol_args)) {
|
||||
ret = PTR_ERR(vol_args);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
sizestr = vol_args->name;
|
||||
devstr = strchr(sizestr, ':');
|
||||
if (devstr) {
|
||||
@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
|
||||
(unsigned long long)devid);
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
if (!strcmp(sizestr, "max"))
|
||||
new_size = device->bdev->bd_inode->i_size;
|
||||
@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
new_size = memparse(sizestr, NULL);
|
||||
if (new_size == 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
if (mod < 0) {
|
||||
if (new_size > old_size) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
new_size = old_size - new_size;
|
||||
} else if (mod > 0) {
|
||||
@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
|
||||
if (new_size < 256 * 1024 * 1024) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
if (new_size > device->bdev->bd_inode->i_size) {
|
||||
ret = -EFBIG;
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
do_div(new_size, root->sectorsize);
|
||||
@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_unlock;
|
||||
goto out_free;
|
||||
}
|
||||
ret = btrfs_grow_device(trans, device, new_size);
|
||||
btrfs_commit_transaction(trans, root);
|
||||
@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
|
||||
ret = btrfs_shrink_device(device, new_size);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
out_free:
|
||||
kfree(vol_args);
|
||||
out:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
if (root->fs_info->balance_ctl) {
|
||||
printk(KERN_INFO "btrfs: balance in progress\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
if (IS_ERR(vol_args)) {
|
||||
ret = PTR_ERR(vol_args);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
ret = btrfs_init_new_device(root, vol_args->name);
|
||||
|
||||
kfree(vol_args);
|
||||
out:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
|
||||
if (root->fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
if (root->fs_info->balance_ctl) {
|
||||
printk(KERN_INFO "btrfs: balance in progress\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
if (IS_ERR(vol_args)) {
|
||||
ret = PTR_ERR(vol_args);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
ret = btrfs_rm_device(root, vol_args->name);
|
||||
|
||||
kfree(vol_args);
|
||||
out:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3034,6 +3065,163 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
|
||||
struct btrfs_ioctl_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
|
||||
|
||||
bargs->flags = bctl->flags;
|
||||
|
||||
if (atomic_read(&fs_info->balance_running))
|
||||
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
|
||||
if (atomic_read(&fs_info->balance_pause_req))
|
||||
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
|
||||
if (atomic_read(&fs_info->balance_cancel_req))
|
||||
bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
|
||||
|
||||
memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
|
||||
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
|
||||
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
|
||||
|
||||
if (lock) {
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
} else {
|
||||
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
|
||||
}
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_ioctl_balance_args *bargs;
|
||||
struct btrfs_balance_control *bctl;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
||||
if (arg) {
|
||||
bargs = memdup_user(arg, sizeof(*bargs));
|
||||
if (IS_ERR(bargs)) {
|
||||
ret = PTR_ERR(bargs);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bargs->flags & BTRFS_BALANCE_RESUME) {
|
||||
if (!fs_info->balance_ctl) {
|
||||
ret = -ENOTCONN;
|
||||
goto out_bargs;
|
||||
}
|
||||
|
||||
bctl = fs_info->balance_ctl;
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->flags |= BTRFS_BALANCE_RESUME;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
|
||||
goto do_balance;
|
||||
}
|
||||
} else {
|
||||
bargs = NULL;
|
||||
}
|
||||
|
||||
if (fs_info->balance_ctl) {
|
||||
ret = -EINPROGRESS;
|
||||
goto out_bargs;
|
||||
}
|
||||
|
||||
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
|
||||
if (!bctl) {
|
||||
ret = -ENOMEM;
|
||||
goto out_bargs;
|
||||
}
|
||||
|
||||
bctl->fs_info = fs_info;
|
||||
if (arg) {
|
||||
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
|
||||
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
|
||||
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
|
||||
|
||||
bctl->flags = bargs->flags;
|
||||
} else {
|
||||
/* balance everything - no filters */
|
||||
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
|
||||
}
|
||||
|
||||
do_balance:
|
||||
ret = btrfs_balance(bctl, bargs);
|
||||
/*
|
||||
* bctl is freed in __cancel_balance or in free_fs_info if
|
||||
* restriper was paused all the way until unmount
|
||||
*/
|
||||
if (arg) {
|
||||
if (copy_to_user(arg, bargs, sizeof(*bargs)))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
out_bargs:
|
||||
kfree(bargs);
|
||||
out:
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
|
||||
{
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case BTRFS_BALANCE_CTL_PAUSE:
|
||||
return btrfs_pause_balance(root->fs_info);
|
||||
case BTRFS_BALANCE_CTL_CANCEL:
|
||||
return btrfs_cancel_balance(root->fs_info);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
|
||||
void __user *arg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_ioctl_balance_args *bargs;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (!fs_info->balance_ctl) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
|
||||
if (!bargs) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
update_ioctl_balance_args(fs_info, 1, bargs);
|
||||
|
||||
if (copy_to_user(arg, bargs, sizeof(*bargs)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(bargs);
|
||||
out:
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@ -3078,7 +3266,7 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_DEV_INFO:
|
||||
return btrfs_ioctl_dev_info(root, argp);
|
||||
case BTRFS_IOC_BALANCE:
|
||||
return btrfs_balance(root->fs_info->dev_root);
|
||||
return btrfs_ioctl_balance(root, NULL);
|
||||
case BTRFS_IOC_CLONE:
|
||||
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
|
||||
case BTRFS_IOC_CLONE_RANGE:
|
||||
@ -3110,6 +3298,12 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_scrub_cancel(root, argp);
|
||||
case BTRFS_IOC_SCRUB_PROGRESS:
|
||||
return btrfs_ioctl_scrub_progress(root, argp);
|
||||
case BTRFS_IOC_BALANCE_V2:
|
||||
return btrfs_ioctl_balance(root, argp);
|
||||
case BTRFS_IOC_BALANCE_CTL:
|
||||
return btrfs_ioctl_balance_ctl(root, arg);
|
||||
case BTRFS_IOC_BALANCE_PROGRESS:
|
||||
return btrfs_ioctl_balance_progress(root, argp);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
|
||||
__u64 reserved[124]; /* pad to 1k */
|
||||
};
|
||||
|
||||
/* balance control ioctl modes */
|
||||
#define BTRFS_BALANCE_CTL_PAUSE 1
|
||||
#define BTRFS_BALANCE_CTL_CANCEL 2
|
||||
|
||||
/*
|
||||
* this is packed, because it should be exactly the same as its disk
|
||||
* byte order counterpart (struct btrfs_disk_balance_args)
|
||||
*/
|
||||
struct btrfs_balance_args {
|
||||
__u64 profiles;
|
||||
__u64 usage;
|
||||
__u64 devid;
|
||||
__u64 pstart;
|
||||
__u64 pend;
|
||||
__u64 vstart;
|
||||
__u64 vend;
|
||||
|
||||
__u64 target;
|
||||
|
||||
__u64 flags;
|
||||
|
||||
__u64 unused[8];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* report balance progress to userspace */
|
||||
struct btrfs_balance_progress {
|
||||
__u64 expected; /* estimated # of chunks that will be
|
||||
* relocated to fulfill the request */
|
||||
__u64 considered; /* # of chunks we have considered so far */
|
||||
__u64 completed; /* # of chunks relocated so far */
|
||||
};
|
||||
|
||||
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
|
||||
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
|
||||
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
|
||||
|
||||
struct btrfs_ioctl_balance_args {
|
||||
__u64 flags; /* in/out */
|
||||
__u64 state; /* out */
|
||||
|
||||
struct btrfs_balance_args data; /* in/out */
|
||||
struct btrfs_balance_args meta; /* in/out */
|
||||
struct btrfs_balance_args sys; /* in/out */
|
||||
|
||||
struct btrfs_balance_progress stat; /* out */
|
||||
|
||||
__u64 unused[72]; /* pad to 1k */
|
||||
};
|
||||
|
||||
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
|
||||
struct btrfs_ioctl_ino_lookup_args {
|
||||
__u64 treeid;
|
||||
@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
|
||||
struct btrfs_ioctl_dev_info_args)
|
||||
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
|
||||
struct btrfs_ioctl_fs_info_args)
|
||||
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
|
||||
struct btrfs_ioctl_balance_args)
|
||||
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
|
||||
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
|
||||
struct btrfs_ioctl_balance_args)
|
||||
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
|
||||
struct btrfs_ioctl_ino_path_args)
|
||||
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
|
||||
|
@ -164,8 +164,9 @@ enum {
|
||||
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
|
||||
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
|
||||
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
|
||||
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
|
||||
Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
|
||||
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
|
||||
Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
static match_table_t tokens = {
|
||||
@ -200,6 +201,7 @@ static match_table_t tokens = {
|
||||
{Opt_inode_cache, "inode_cache"},
|
||||
{Opt_no_space_cache, "nospace_cache"},
|
||||
{Opt_recovery, "recovery"},
|
||||
{Opt_skip_balance, "skip_balance"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
@ -398,6 +400,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
|
||||
printk(KERN_INFO "btrfs: enabling auto recovery");
|
||||
btrfs_set_opt(info->mount_opt, RECOVERY);
|
||||
break;
|
||||
case Opt_skip_balance:
|
||||
btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
|
||||
break;
|
||||
case Opt_err:
|
||||
printk(KERN_INFO "btrfs: unrecognized mount option "
|
||||
"'%s'\n", p);
|
||||
@ -723,6 +728,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
||||
seq_puts(seq, ",autodefrag");
|
||||
if (btrfs_test_opt(root, INODE_MAP_CACHE))
|
||||
seq_puts(seq, ",inode_cache");
|
||||
if (btrfs_test_opt(root, SKIP_BALANCE))
|
||||
seq_puts(seq, ",skip_balance");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/iocontext.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <asm/div64.h>
|
||||
#include "compat.h"
|
||||
#include "ctree.h"
|
||||
@ -1282,7 +1283,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
||||
bool clear_super = false;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
|
||||
all_avail = root->fs_info->avail_data_alloc_bits |
|
||||
root->fs_info->avail_system_alloc_bits |
|
||||
@ -1452,7 +1452,6 @@ error_close:
|
||||
if (bdev)
|
||||
blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
|
||||
out:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
return ret;
|
||||
error_undo:
|
||||
@ -1629,7 +1628,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
}
|
||||
|
||||
filemap_write_and_wait(bdev->bd_inode->i_mapping);
|
||||
mutex_lock(&root->fs_info->volume_mutex);
|
||||
|
||||
devices = &root->fs_info->fs_devices->devices;
|
||||
/*
|
||||
@ -1757,8 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||
ret = btrfs_relocate_sys_chunks(root);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&root->fs_info->volume_mutex);
|
||||
|
||||
return ret;
|
||||
error:
|
||||
blkdev_put(bdev, FMODE_EXCL);
|
||||
@ -1766,7 +1763,7 @@ error:
|
||||
mutex_unlock(&uuid_mutex);
|
||||
up_write(&sb->s_umount);
|
||||
}
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
|
||||
@ -2077,6 +2074,362 @@ error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_balance_item(struct btrfs_root *root,
|
||||
struct btrfs_balance_control *bctl)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_balance_item *item;
|
||||
struct btrfs_disk_balance_args disk_bargs;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key key;
|
||||
int ret, err;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
btrfs_free_path(path);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
key.objectid = BTRFS_BALANCE_OBJECTID;
|
||||
key.type = BTRFS_BALANCE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
||||
sizeof(*item));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
|
||||
|
||||
memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
|
||||
|
||||
btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
|
||||
btrfs_set_balance_data(leaf, item, &disk_bargs);
|
||||
btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
|
||||
btrfs_set_balance_meta(leaf, item, &disk_bargs);
|
||||
btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
|
||||
btrfs_set_balance_sys(leaf, item, &disk_bargs);
|
||||
|
||||
btrfs_set_balance_flags(leaf, item, bctl->flags);
|
||||
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
err = btrfs_commit_transaction(trans, root);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int del_balance_item(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret, err;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
btrfs_free_path(path);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
key.objectid = BTRFS_BALANCE_OBJECTID;
|
||||
key.type = BTRFS_BALANCE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
err = btrfs_commit_transaction(trans, root);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a heuristic used to reduce the number of chunks balanced on
|
||||
* resume after balance was interrupted.
|
||||
*/
|
||||
static void update_balance_args(struct btrfs_balance_control *bctl)
|
||||
{
|
||||
/*
|
||||
* Turn on soft mode for chunk types that were being converted.
|
||||
*/
|
||||
if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
|
||||
bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
|
||||
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
|
||||
bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
|
||||
if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
|
||||
bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
|
||||
|
||||
/*
|
||||
* Turn on usage filter if is not already used. The idea is
|
||||
* that chunks that we have already balanced should be
|
||||
* reasonably full. Don't do it for chunks that are being
|
||||
* converted - that will keep us from relocating unconverted
|
||||
* (albeit full) chunks.
|
||||
*/
|
||||
if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->data.usage = 90;
|
||||
}
|
||||
if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->sys.usage = 90;
|
||||
}
|
||||
if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
!(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
|
||||
bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
|
||||
bctl->meta.usage = 90;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Should be called with both balance and volume mutexes held to
|
||||
* serialize other volume operations (add_dev/rm_dev/resize) with
|
||||
* restriper. Same goes for unset_balance_control.
|
||||
*/
|
||||
static void set_balance_control(struct btrfs_balance_control *bctl)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bctl->fs_info;
|
||||
|
||||
BUG_ON(fs_info->balance_ctl);
|
||||
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
fs_info->balance_ctl = bctl;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
}
|
||||
|
||||
static void unset_balance_control(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
|
||||
|
||||
BUG_ON(!fs_info->balance_ctl);
|
||||
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
fs_info->balance_ctl = NULL;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
|
||||
kfree(bctl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Balance filters. Return 1 if chunk should be filtered out
|
||||
* (should not be balanced).
|
||||
*/
|
||||
static int chunk_profiles_filter(u64 chunk_profile,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
|
||||
if (chunk_profile == 0)
|
||||
chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
if (bargs->profiles & chunk_profile)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u64 div_factor_fine(u64 num, int factor)
|
||||
{
|
||||
if (factor <= 0)
|
||||
return 0;
|
||||
if (factor >= 100)
|
||||
return num;
|
||||
|
||||
num *= factor;
|
||||
do_div(num, 100);
|
||||
return num;
|
||||
}
|
||||
|
||||
static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_block_group_cache *cache;
|
||||
u64 chunk_used, user_thresh;
|
||||
int ret = 1;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
chunk_used = btrfs_block_group_used(&cache->item);
|
||||
|
||||
user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
|
||||
if (chunk_used < user_thresh)
|
||||
ret = 0;
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int chunk_devid_filter(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_stripe *stripe;
|
||||
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
stripe = btrfs_stripe_nr(chunk, i);
|
||||
if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* [pstart, pend) */
|
||||
static int chunk_drange_filter(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
u64 chunk_offset,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_stripe *stripe;
|
||||
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
u64 stripe_offset;
|
||||
u64 stripe_length;
|
||||
int factor;
|
||||
int i;
|
||||
|
||||
if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
|
||||
return 0;
|
||||
|
||||
if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))
|
||||
factor = 2;
|
||||
else
|
||||
factor = 1;
|
||||
factor = num_stripes / factor;
|
||||
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
stripe = btrfs_stripe_nr(chunk, i);
|
||||
if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
|
||||
continue;
|
||||
|
||||
stripe_offset = btrfs_stripe_offset(leaf, stripe);
|
||||
stripe_length = btrfs_chunk_length(leaf, chunk);
|
||||
do_div(stripe_length, factor);
|
||||
|
||||
if (stripe_offset < bargs->pend &&
|
||||
stripe_offset + stripe_length > bargs->pstart)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* [vstart, vend) */
|
||||
static int chunk_vrange_filter(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
u64 chunk_offset,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
if (chunk_offset < bargs->vend &&
|
||||
chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
|
||||
/* at least part of the chunk is inside this vrange */
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int chunk_soft_convert_filter(u64 chunk_profile,
|
||||
struct btrfs_balance_args *bargs)
|
||||
{
|
||||
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
|
||||
return 0;
|
||||
|
||||
chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||
|
||||
if (chunk_profile == 0)
|
||||
chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
|
||||
if (bargs->target & chunk_profile)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int should_balance_chunk(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk, u64 chunk_offset)
|
||||
{
|
||||
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
|
||||
struct btrfs_balance_args *bargs = NULL;
|
||||
u64 chunk_type = btrfs_chunk_type(leaf, chunk);
|
||||
|
||||
/* type filter */
|
||||
if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
|
||||
(bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
|
||||
bargs = &bctl->data;
|
||||
else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
bargs = &bctl->sys;
|
||||
else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
|
||||
bargs = &bctl->meta;
|
||||
|
||||
/* profiles filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
|
||||
chunk_profiles_filter(chunk_type, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* usage filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
|
||||
chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* devid filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
|
||||
chunk_devid_filter(leaf, chunk, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* drange filter, makes sense only with devid filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
|
||||
chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* vrange filter */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
|
||||
chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* soft profile changing mode */
|
||||
if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
|
||||
chunk_soft_convert_filter(chunk_type, bargs)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u64 div_factor(u64 num, int factor)
|
||||
{
|
||||
if (factor == 10)
|
||||
@ -2086,29 +2439,28 @@ static u64 div_factor(u64 num, int factor)
|
||||
return num;
|
||||
}
|
||||
|
||||
int btrfs_balance(struct btrfs_root *dev_root)
|
||||
static int __btrfs_balance(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret;
|
||||
struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
|
||||
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
|
||||
struct btrfs_root *chunk_root = fs_info->chunk_root;
|
||||
struct btrfs_root *dev_root = fs_info->dev_root;
|
||||
struct list_head *devices;
|
||||
struct btrfs_device *device;
|
||||
u64 old_size;
|
||||
u64 size_to_free;
|
||||
struct btrfs_chunk *chunk;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_key found_key;
|
||||
|
||||
if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&dev_root->fs_info->volume_mutex);
|
||||
dev_root = dev_root->fs_info->dev_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
int ret;
|
||||
int enospc_errors = 0;
|
||||
bool counting = true;
|
||||
|
||||
/* step one make some room on all the devices */
|
||||
devices = &fs_info->fs_devices->devices;
|
||||
list_for_each_entry(device, devices, dev_list) {
|
||||
old_size = device->total_bytes;
|
||||
size_to_free = div_factor(old_size, 1);
|
||||
@ -2137,11 +2489,23 @@ int btrfs_balance(struct btrfs_root *dev_root)
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* zero out stat counters */
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
memset(&bctl->stat, 0, sizeof(bctl->stat));
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
again:
|
||||
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
||||
key.offset = (u64)-1;
|
||||
key.type = BTRFS_CHUNK_ITEM_KEY;
|
||||
|
||||
while (1) {
|
||||
if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
|
||||
atomic_read(&fs_info->balance_cancel_req)) {
|
||||
ret = -ECANCELED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
@ -2151,15 +2515,19 @@ int btrfs_balance(struct btrfs_root *dev_root)
|
||||
* failed
|
||||
*/
|
||||
if (ret == 0)
|
||||
break;
|
||||
BUG(); /* FIXME break ? */
|
||||
|
||||
ret = btrfs_previous_item(chunk_root, path, 0,
|
||||
BTRFS_CHUNK_ITEM_KEY);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||
path->slots[0]);
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
|
||||
@ -2167,22 +2535,375 @@ int btrfs_balance(struct btrfs_root *dev_root)
|
||||
if (found_key.offset == 0)
|
||||
break;
|
||||
|
||||
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
|
||||
|
||||
if (!counting) {
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->stat.considered++;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
}
|
||||
|
||||
ret = should_balance_chunk(chunk_root, leaf, chunk,
|
||||
found_key.offset);
|
||||
btrfs_release_path(path);
|
||||
if (!ret)
|
||||
goto loop;
|
||||
|
||||
if (counting) {
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->stat.expected++;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
goto loop;
|
||||
}
|
||||
|
||||
ret = btrfs_relocate_chunk(chunk_root,
|
||||
chunk_root->root_key.objectid,
|
||||
found_key.objectid,
|
||||
found_key.offset);
|
||||
if (ret && ret != -ENOSPC)
|
||||
goto error;
|
||||
if (ret == -ENOSPC) {
|
||||
enospc_errors++;
|
||||
} else {
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
bctl->stat.completed++;
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
}
|
||||
loop:
|
||||
key.offset = found_key.offset - 1;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
if (counting) {
|
||||
btrfs_release_path(path);
|
||||
counting = false;
|
||||
goto again;
|
||||
}
|
||||
error:
|
||||
btrfs_free_path(path);
|
||||
mutex_unlock(&dev_root->fs_info->volume_mutex);
|
||||
if (enospc_errors) {
|
||||
printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
|
||||
enospc_errors);
|
||||
if (!ret)
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/* cancel requested || normal exit path */
|
||||
return atomic_read(&fs_info->balance_cancel_req) ||
|
||||
(atomic_read(&fs_info->balance_pause_req) == 0 &&
|
||||
atomic_read(&fs_info->balance_cancel_req) == 0);
|
||||
}
|
||||
|
||||
static void __cancel_balance(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret;
|
||||
|
||||
unset_balance_control(fs_info);
|
||||
ret = del_balance_item(fs_info->tree_root);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
|
||||
/*
|
||||
* Should be called with both balance and volume mutexes held
|
||||
*/
|
||||
int btrfs_balance(struct btrfs_balance_control *bctl,
|
||||
struct btrfs_ioctl_balance_args *bargs)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bctl->fs_info;
|
||||
u64 allowed;
|
||||
int ret;
|
||||
|
||||
if (btrfs_fs_closing(fs_info) ||
|
||||
atomic_read(&fs_info->balance_pause_req) ||
|
||||
atomic_read(&fs_info->balance_cancel_req)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* In case of mixed groups both data and meta should be picked,
|
||||
* and identical options should be given for both of them.
|
||||
*/
|
||||
allowed = btrfs_super_incompat_flags(fs_info->super_copy);
|
||||
if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
|
||||
if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
|
||||
!(bctl->flags & BTRFS_BALANCE_METADATA) ||
|
||||
memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
|
||||
printk(KERN_ERR "btrfs: with mixed groups data and "
|
||||
"metadata balance options must be the same\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Profile changing sanity checks. Skip them if a simple
|
||||
* balance is requested.
|
||||
*/
|
||||
if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
|
||||
BTRFS_BALANCE_ARGS_CONVERT))
|
||||
goto do_balance;
|
||||
|
||||
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
|
||||
if (fs_info->fs_devices->num_devices == 1)
|
||||
allowed |= BTRFS_BLOCK_GROUP_DUP;
|
||||
else if (fs_info->fs_devices->num_devices < 4)
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
|
||||
else
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10);
|
||||
|
||||
if (!profile_is_valid(bctl->data.target, 1) ||
|
||||
bctl->data.target & ~allowed) {
|
||||
printk(KERN_ERR "btrfs: unable to start balance with target "
|
||||
"data profile %llu\n",
|
||||
(unsigned long long)bctl->data.target);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!profile_is_valid(bctl->meta.target, 1) ||
|
||||
bctl->meta.target & ~allowed) {
|
||||
printk(KERN_ERR "btrfs: unable to start balance with target "
|
||||
"metadata profile %llu\n",
|
||||
(unsigned long long)bctl->meta.target);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!profile_is_valid(bctl->sys.target, 1) ||
|
||||
bctl->sys.target & ~allowed) {
|
||||
printk(KERN_ERR "btrfs: unable to start balance with target "
|
||||
"system profile %llu\n",
|
||||
(unsigned long long)bctl->sys.target);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) {
|
||||
printk(KERN_ERR "btrfs: dup for data is not allowed\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* allow to reduce meta or sys integrity only if force set */
|
||||
allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10;
|
||||
if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(fs_info->avail_system_alloc_bits & allowed) &&
|
||||
!(bctl->sys.target & allowed)) ||
|
||||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
|
||||
(fs_info->avail_metadata_alloc_bits & allowed) &&
|
||||
!(bctl->meta.target & allowed))) {
|
||||
if (bctl->flags & BTRFS_BALANCE_FORCE) {
|
||||
printk(KERN_INFO "btrfs: force reducing metadata "
|
||||
"integrity\n");
|
||||
} else {
|
||||
printk(KERN_ERR "btrfs: balance will reduce metadata "
|
||||
"integrity, use force if you want this\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
do_balance:
|
||||
ret = insert_balance_item(fs_info->tree_root, bctl);
|
||||
if (ret && ret != -EEXIST)
|
||||
goto out;
|
||||
|
||||
if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
|
||||
BUG_ON(ret == -EEXIST);
|
||||
set_balance_control(bctl);
|
||||
} else {
|
||||
BUG_ON(ret != -EEXIST);
|
||||
spin_lock(&fs_info->balance_lock);
|
||||
update_balance_args(bctl);
|
||||
spin_unlock(&fs_info->balance_lock);
|
||||
}
|
||||
|
||||
atomic_inc(&fs_info->balance_running);
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
|
||||
ret = __btrfs_balance(fs_info);
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
atomic_dec(&fs_info->balance_running);
|
||||
|
||||
if (bargs) {
|
||||
memset(bargs, 0, sizeof(*bargs));
|
||||
update_ioctl_balance_args(fs_info, 0, bargs);
|
||||
}
|
||||
|
||||
if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
|
||||
balance_need_close(fs_info)) {
|
||||
__cancel_balance(fs_info);
|
||||
}
|
||||
|
||||
wake_up(&fs_info->balance_wait_q);
|
||||
|
||||
return ret;
|
||||
out:
|
||||
if (bctl->flags & BTRFS_BALANCE_RESUME)
|
||||
__cancel_balance(fs_info);
|
||||
else
|
||||
kfree(bctl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int balance_kthread(void *data)
|
||||
{
|
||||
struct btrfs_balance_control *bctl =
|
||||
(struct btrfs_balance_control *)data;
|
||||
struct btrfs_fs_info *fs_info = bctl->fs_info;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
||||
set_balance_control(bctl);
|
||||
|
||||
if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
|
||||
printk(KERN_INFO "btrfs: force skipping balance\n");
|
||||
} else {
|
||||
printk(KERN_INFO "btrfs: continuing balance\n");
|
||||
ret = btrfs_balance(bctl, NULL);
|
||||
}
|
||||
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_recover_balance(struct btrfs_root *tree_root)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
struct btrfs_balance_control *bctl;
|
||||
struct btrfs_balance_item *item;
|
||||
struct btrfs_disk_balance_args disk_bargs;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
|
||||
if (!bctl) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
key.objectid = BTRFS_BALANCE_OBJECTID;
|
||||
key.type = BTRFS_BALANCE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out_bctl;
|
||||
if (ret > 0) { /* ret = -ENOENT; */
|
||||
ret = 0;
|
||||
goto out_bctl;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
|
||||
|
||||
bctl->fs_info = tree_root->fs_info;
|
||||
bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
|
||||
|
||||
btrfs_balance_data(leaf, item, &disk_bargs);
|
||||
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
|
||||
btrfs_balance_meta(leaf, item, &disk_bargs);
|
||||
btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
|
||||
btrfs_balance_sys(leaf, item, &disk_bargs);
|
||||
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
|
||||
|
||||
tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
|
||||
if (IS_ERR(tsk))
|
||||
ret = PTR_ERR(tsk);
|
||||
else
|
||||
goto out;
|
||||
|
||||
out_bctl:
|
||||
kfree(bctl);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (!fs_info->balance_ctl) {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
if (atomic_read(&fs_info->balance_running)) {
|
||||
atomic_inc(&fs_info->balance_pause_req);
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
|
||||
wait_event(fs_info->balance_wait_q,
|
||||
atomic_read(&fs_info->balance_running) == 0);
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
/* we are good with balance_ctl ripped off from under us */
|
||||
BUG_ON(atomic_read(&fs_info->balance_running));
|
||||
atomic_dec(&fs_info->balance_pause_req);
|
||||
} else {
|
||||
ret = -ENOTCONN;
|
||||
}
|
||||
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (!fs_info->balance_ctl) {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
atomic_inc(&fs_info->balance_cancel_req);
|
||||
/*
|
||||
* if we are running just wait and return, balance item is
|
||||
* deleted in btrfs_balance in this case
|
||||
*/
|
||||
if (atomic_read(&fs_info->balance_running)) {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
wait_event(fs_info->balance_wait_q,
|
||||
atomic_read(&fs_info->balance_running) == 0);
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
} else {
|
||||
/* __cancel_balance needs volume_mutex */
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
mutex_lock(&fs_info->volume_mutex);
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
|
||||
if (fs_info->balance_ctl)
|
||||
__cancel_balance(fs_info);
|
||||
|
||||
mutex_unlock(&fs_info->volume_mutex);
|
||||
}
|
||||
|
||||
BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
|
||||
atomic_dec(&fs_info->balance_cancel_req);
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* shrinking a device means finding all of the device extents past
|
||||
* the new size, and then following the back refs to the chunks.
|
||||
@ -2756,8 +3477,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
|
||||
alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
|
||||
(fs_info->metadata_alloc_profile &
|
||||
fs_info->avail_metadata_alloc_bits);
|
||||
fs_info->avail_metadata_alloc_bits;
|
||||
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
|
||||
|
||||
ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
|
||||
@ -2767,8 +3487,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
|
||||
sys_chunk_offset = chunk_offset + chunk_size;
|
||||
|
||||
alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
|
||||
(fs_info->system_alloc_profile &
|
||||
fs_info->avail_system_alloc_bits);
|
||||
fs_info->avail_system_alloc_bits;
|
||||
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
|
||||
|
||||
ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
|
||||
@ -2955,12 +3674,8 @@ again:
|
||||
}
|
||||
}
|
||||
if (rw & REQ_DISCARD) {
|
||||
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID10)) {
|
||||
if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK)
|
||||
stripes_required = map->num_stripes;
|
||||
}
|
||||
}
|
||||
if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
|
||||
stripes_allocated < stripes_required) {
|
||||
@ -2984,10 +3699,7 @@ again:
|
||||
|
||||
if (rw & REQ_DISCARD)
|
||||
*length = min_t(u64, em->len - offset, *length);
|
||||
else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_DUP)) {
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
/* we limit the length of each bio to what fits in a stripe */
|
||||
*length = min_t(u64, em->len - offset,
|
||||
map->stripe_len - stripe_offset);
|
||||
|
@ -186,6 +186,51 @@ struct map_lookup {
|
||||
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
|
||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
||||
|
||||
/*
|
||||
* Restriper's general type filter
|
||||
*/
|
||||
#define BTRFS_BALANCE_DATA (1ULL << 0)
|
||||
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
|
||||
#define BTRFS_BALANCE_METADATA (1ULL << 2)
|
||||
|
||||
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
|
||||
BTRFS_BALANCE_SYSTEM | \
|
||||
BTRFS_BALANCE_METADATA)
|
||||
|
||||
#define BTRFS_BALANCE_FORCE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_RESUME (1ULL << 4)
|
||||
|
||||
/*
|
||||
* Balance filters
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
|
||||
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
|
||||
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
|
||||
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
|
||||
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
|
||||
|
||||
/*
|
||||
* Profile changing flags. When SOFT is set we won't relocate chunk if
|
||||
* it already has the target profile (even though it may be
|
||||
* half-filled).
|
||||
*/
|
||||
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
|
||||
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
|
||||
|
||||
struct btrfs_balance_args;
|
||||
struct btrfs_balance_progress;
|
||||
struct btrfs_balance_control {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
|
||||
struct btrfs_balance_args data;
|
||||
struct btrfs_balance_args meta;
|
||||
struct btrfs_balance_args sys;
|
||||
|
||||
u64 flags;
|
||||
|
||||
struct btrfs_balance_progress stat;
|
||||
};
|
||||
|
||||
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
|
||||
u64 end, u64 *length);
|
||||
|
||||
@ -228,7 +273,11 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
|
||||
u8 *uuid, u8 *fsid);
|
||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||
int btrfs_init_new_device(struct btrfs_root *root, char *path);
|
||||
int btrfs_balance(struct btrfs_root *dev_root);
|
||||
int btrfs_balance(struct btrfs_balance_control *bctl,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
int btrfs_recover_balance(struct btrfs_root *tree_root);
|
||||
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
|
||||
int find_free_dev_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 num_bytes,
|
||||
|
Loading…
Reference in New Issue
Block a user