From 9b4eaf43f4b0207b5d1ca8b8d22df88ea9e142fe Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:35:59 +0800 Subject: [PATCH 01/20] btrfs: rename add_device_membership to btrfs_kobj_add_device Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index df39458f1487..1395efbffa2d 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -605,7 +605,7 @@ static void init_feature_attrs(void) } } -static int add_device_membership(struct btrfs_fs_info *fs_info) +static int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info) { int error = 0; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -666,7 +666,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) if (error) goto failure; - error = add_device_membership(fs_info); + error = btrfs_kobj_add_device(fs_info); if (error) goto failure; From 99994cde9c59c2b8bb67d46d531b26cc73e39747 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:36:00 +0800 Subject: [PATCH 02/20] btrfs: dev delete should remove sysfs entry when we delete the device from the mounted btrfs, we would need its corresponding sysfs enty to be removed as well. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 20 ++++++++++++++++++++ fs/btrfs/sysfs.h | 2 ++ fs/btrfs/volumes.c | 4 ++++ 3 files changed, 26 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1395efbffa2d..401677b11045 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -605,6 +605,26 @@ static void init_feature_attrs(void) } } +int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, + struct btrfs_device *one_device) +{ + struct hd_struct *disk; + struct kobject *disk_kobj; + + if (!fs_info->device_dir_kobj) + return -EINVAL; + + if (one_device) { + disk = one_device->bdev->bd_part; + disk_kobj = &part_to_dev(disk)->kobj; + + sysfs_remove_link(fs_info->device_dir_kobj, + disk_kobj->name); + } + + return 0; +} + static int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info) { int error = 0; diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 9ab576318a84..529554e5f9de 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -66,4 +66,6 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); extern const char * const btrfs_feature_set_names[3]; extern struct kobj_type space_info_ktype; extern struct kobj_type btrfs_raid_ktype; +int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, + struct btrfs_device *one_device); #endif /* _BTRFS_SYSFS_H_ */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c83b24251e53..be2c8e285afb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -40,6 +40,7 @@ #include "rcu-string.h" #include "math.h" #include "dev-replace.h" +#include "sysfs.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1680,6 +1681,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) device->fs_devices->open_devices--; + /* remove sysfs entry */ + btrfs_kobj_rm_device(root->fs_info, device); + call_rcu(&device->rcu, free_device); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; From 0d39376aa28eba6d63d0120ccc399735842abc8e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:36:01 +0800 Subject: [PATCH 03/20] btrfs: dev add should add its sysfs entry we would need the device links to be created, when device is added. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 12 +++++++++--- fs/btrfs/sysfs.h | 2 ++ fs/btrfs/volumes.c | 5 +++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 401677b11045..78699364f537 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -625,14 +625,17 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, return 0; } -static int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info) +int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, + struct btrfs_device *one_device) { int error = 0; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *dev; - fs_info->device_dir_kobj = kobject_create_and_add("devices", + if (!fs_info->device_dir_kobj) + fs_info->device_dir_kobj = kobject_create_and_add("devices", &fs_info->super_kobj); + if (!fs_info->device_dir_kobj) return -ENOMEM; @@ -643,6 +646,9 @@ static int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info) if (!dev->bdev) continue; + if (one_device && one_device != dev) + continue; + disk = dev->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; @@ -686,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) if (error) goto failure; - error = btrfs_kobj_add_device(fs_info); + error = btrfs_kobj_add_device(fs_info, NULL); if (error) goto failure; diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 529554e5f9de..ac46df37504c 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -66,6 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); extern const char * const btrfs_feature_set_names[3]; extern struct kobj_type space_info_ktype; extern struct kobj_type btrfs_raid_ktype; +int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, + struct btrfs_device *one_device); int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_device *one_device); #endif /* _BTRFS_SYSFS_H_ */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index be2c8e285afb..19b67e969b52 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2147,6 +2147,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); btrfs_set_super_num_devices(root->fs_info->super_copy, total_bytes + 1); + + /* add sysfs device entry */ + btrfs_kobj_add_device(root->fs_info, device); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { @@ -2209,6 +2213,7 @@ error_trans: unlock_chunks(root); btrfs_end_transaction(trans, root); rcu_string_free(device->name); + btrfs_kobj_rm_device(root->fs_info, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); From 49c6f736f34f901117c20960ebd7d5e60f12fcac Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:36:02 +0800 Subject: [PATCH 04/20] btrfs: dev replace should replace the sysfs entry when we replace the device its corresponding sysfs entry has to be replaced as well Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/dev-replace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 2af6e66fe788..eea26e1b2fda 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -36,6 +36,7 @@ #include "check-integrity.h" #include "rcu-string.h" #include "dev-replace.h" +#include "sysfs.h" static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret); @@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, fs_info->fs_devices->latest_bdev = tgt_device->bdev; list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); + /* replace the sysfs entry */ + btrfs_kobj_rm_device(fs_info, src_device); + btrfs_kobj_add_device(fs_info, tgt_device); + btrfs_rm_dev_replace_blocked(fs_info); btrfs_rm_dev_replace_srcdev(fs_info, src_device); From b2373f255cacdc1ea4da25e75a5a78949ffd9d66 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:36:03 +0800 Subject: [PATCH 05/20] btrfs: create sprout should rename fsid on the sysfs as well Creating sprout will change the fsid of the mounted root. do the same on the sysfs as well. reproducer: mount /dev/sdb /btrfs (seed disk) btrfs dev add /dev/sdc /btrfs mount -o rw,remount /btrfs btrfs dev del /dev/sdb /btrfs mount /dev/sdb /btrfs Error: kobject_add_internal failed for fe350492-dc28-4051-a601-e017b17e6145 with -EEXIST, don't try to register things with the same name in the same directory. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 9 +++++++++ include/uapi/linux/btrfs.h | 1 + 2 files changed, 10 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19b67e969b52..6ca0d9cc46f9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2154,6 +2154,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { + char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; ret = init_first_rw_device(trans, root, device); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -2164,6 +2165,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_abort_transaction(trans, root, ret); goto error_trans; } + + /* Sprouting would change fsid of the mounted root, + * so rename the fsid on the sysfs + */ + snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", + root->fs_info->fsid); + if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) + goto error_trans; } else { ret = btrfs_add_device(trans, root, device); if (ret) { diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 6f9c38ce45c7..2f47824e7a36 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -38,6 +38,7 @@ struct btrfs_ioctl_vol_args { #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 +#define BTRFS_UUID_UNPARSED_SIZE 37 #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) From 5588383ece6127909df5b9d601d562fe5b9fe38a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 24 Jun 2014 15:39:16 +0800 Subject: [PATCH 06/20] Btrfs: fix crash when mounting raid5 btrfs with missing disks The reproducer is $ mkfs.btrfs D1 D2 D3 -mraid5 $ mkfs.ext4 D2 && mkfs.ext4 D3 $ mount D1 /btrfs -odegraded ------------------- [ 87.672992] ------------[ cut here ]------------ [ 87.673845] kernel BUG at fs/btrfs/raid56.c:1828! ... [ 87.673845] RIP: 0010:[] [] __raid_recover_end_io+0x4ae/0x4d0 ... [ 87.673845] Call Trace: [ 87.673845] [] ? mempool_free+0x36/0xa0 [ 87.673845] [] raid_recover_end_io+0x75/0xa0 [ 87.673845] [] bio_endio+0x5b/0xa0 [ 87.673845] [] bio_endio_nodec+0x12/0x20 [ 87.673845] [] end_workqueue_fn+0x41/0x50 [ 87.673845] [] normal_work_helper+0xca/0x2c0 [ 87.673845] [] process_one_work+0x1eb/0x530 [ 87.673845] [] ? process_one_work+0x189/0x530 [ 87.673845] [] worker_thread+0x11b/0x4f0 [ 87.673845] [] ? rescuer_thread+0x290/0x290 [ 87.673845] [] kthread+0xe4/0x100 [ 87.673845] [] ? kthread_create_on_node+0x220/0x220 [ 87.673845] [] ret_from_fork+0x7c/0xb0 [ 87.673845] [] ? kthread_create_on_node+0x220/0x220 ------------------- It's because that we miscalculate @rbio->bbio->error so that it doesn't reach maximum of tolerable errors while it should have. Signed-off-by: Liu Bo Tested-by: Satoru Takeuchi Signed-off-by: Chris Mason --- fs/btrfs/raid56.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4055291a523e..4a88f073fdd7 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * pages are going to be uptodate. */ for (stripe = 0; stripe < bbio->num_stripes; stripe++) { - if (rbio->faila == stripe || - rbio->failb == stripe) + if (rbio->faila == stripe || rbio->failb == stripe) { + atomic_inc(&rbio->bbio->error); continue; + } for (pagenr = 0; pagenr < nr_pages; pagenr++) { struct page *p; From c39aa7056f50fa7b49265ea35c0f3eddb7c7be68 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 25 Jun 2014 00:00:44 +0900 Subject: [PATCH 07/20] btrfs compression: reuse recently used workspace Add compression `workspace' in free_workspace() to `idle_workspace' list head, instead of tail. So we have better chances to reuse most recently used `workspace'. Signed-off-by: Sergey Senozhatsky Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 92371c414228..1daea0b47187 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace) spin_lock(workspace_lock); if (*num_workspace < num_online_cpus()) { - list_add_tail(workspace, idle_workspace); + list_add(workspace, idle_workspace); (*num_workspace)++; spin_unlock(workspace_lock); goto wake; From 46c4e71e9b02a649c722f06569f5b6575da02dba Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 24 Jun 2014 17:48:28 +0100 Subject: [PATCH 08/20] Btrfs: assert send doesn't attempt to start transactions When starting a transaction just assert that current->journal_info doesn't contain a send transaction stub, since send isn't supposed to start transactions and when it finishes (either successfully or not) it's supposed to set current->journal_info to NULL. This is motivated by the change titled: Btrfs: fix crash when starting transaction Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 511839c04f11..a32ad65b6cdc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, bool reloc_reserved = false; int ret; + /* Send isn't supposed to start transactions. */ + ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); + if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); - if (current->journal_info && - current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) { + if (current->journal_info) { WARN_ON(type & TRANS_EXTWRITERS); h = current->journal_info; h->use_count++; From 472b909ff6f4884d235ef7b9d3847fad5efafbff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 25 Jun 2014 13:45:41 -0700 Subject: [PATCH 09/20] btrfs: only unlock block in verify_parent_transid if we locked it This is a regression from my patch a26e8c9f75b0bfd8cccc9e8f110737b136eb5994, we need to only unlock the block if we were the one who locked it. Otherwise this will trip BUG_ON()'s in locking.c Thanks, cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8bb4aa19898f..f00165de6fbf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, &cached_state, GFP_NOFS); - btrfs_tree_read_unlock_blocking(eb); + if (need_lock) + btrfs_tree_read_unlock_blocking(eb); return ret; } From 3cc79392558f1789e5e1d2fce44b681980f403c3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 25 Jun 2014 22:36:02 +0100 Subject: [PATCH 10/20] Btrfs: atomically set inode->i_flags in btrfs_update_iflags This change is based on the corresponding recent change for ext4: ext4: atomically set inode->i_flags in ext4_set_inode_flags() That has the following commit message that applies to btrfs as well: "Use cmpxchg() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time." Replacing EXT4_IMMUTABLE_FL and EXT4_APPEND_FL with BTRFS_INODE_IMMUTABLE and BTRFS_INODE_APPEND, respectively. Reviewed-by: David Sterba Reviewed-by: Satoru Takeuchi Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6ea15469c63f..02dc64bbf1cb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) void btrfs_update_iflags(struct inode *inode) { struct btrfs_inode *ip = BTRFS_I(inode); - - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + unsigned int new_fl = 0; if (ip->flags & BTRFS_INODE_SYNC) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (ip->flags & BTRFS_INODE_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (ip->flags & BTRFS_INODE_APPEND) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (ip->flags & BTRFS_INODE_NOATIME) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (ip->flags & BTRFS_INODE_DIRSYNC) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + + set_mask_bits(&inode->i_flags, + S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, + new_fl); } /* From 5f3164813b90f7dbcb5c3ab9006906222ce471b7 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 26 Jun 2014 11:08:16 +0800 Subject: [PATCH 11/20] Btrfs: fix race between balance recovery and root deletion Balance recovery is called when RW mounting or remounting from RO to RW, it is called to finish roots merging. When doing balance recovery, relocation root's corresponding fs root(whose root refs is 0) might be destroyed by cleaner thread, this will make btrfs fail to mount. Fix this problem by holding @cleaner_mutex when doing balance recovery. Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/super.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f00165de6fbf..08e65e9cf2aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2905,7 +2905,9 @@ retry_root_backup: if (ret) goto fail_qgroup; + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(tree_root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { printk(KERN_WARNING "BTRFS: failed to recover relocation\n"); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4662d92a4b73..b6ebde231de7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1467,7 +1467,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; /* recover relocation */ + mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(root); + mutex_unlock(&fs_info->cleaner_mutex); if (ret) goto restore; From 2aa06a35d06a34b3109bdbf1d653de1695dc8f12 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 27 Jun 2014 16:50:31 -0500 Subject: [PATCH 12/20] btrfs: fix nossd and ssd_spread mount option regression The commit 0780253 btrfs: Cleanup the btrfs_parse_options for remount. broke ssd options quite badly; it stopped making ssd_spread imply ssd, and it made "nossd" unsettable. Put things back at least as well as they were before (though ssd mount option handling is still pretty odd: # mount -o "nossd,ssd_spread" works?) Reported-by: Roman Mamedov Signed-off-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b6ebde231de7..0927e463afca 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_ssd_spread: btrfs_set_and_info(root, SSD_SPREAD, "use spread ssd allocation scheme"); + btrfs_set_opt(info->mount_opt, SSD); break; case Opt_nossd: - btrfs_clear_and_info(root, NOSSD, + btrfs_set_and_info(root, NOSSD, "not using ssd allocation scheme"); btrfs_clear_opt(info->mount_opt, SSD); break; From e755f780865221252ef3321215c9796b78e7b1c5 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 30 Jun 2014 17:12:47 +0800 Subject: [PATCH 13/20] btrfs: fix null pointer dereference in clone_fs_devices when name is null when one of the device path is missing btrfs_device name is null. So this patch will check for that. stack: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] strlen+0x0/0x30 [] ? clone_fs_devices+0xaa/0x160 [btrfs] [] btrfs_init_new_device+0x317/0xca0 [btrfs] [] ? __kmalloc_track_caller+0x15a/0x1a0 [] btrfs_ioctl+0xaa3/0x2860 [btrfs] [] ? handle_mm_fault+0x48c/0x9c0 [] ? __blkdev_put+0x171/0x180 [] ? __do_page_fault+0x4ac/0x590 [] ? blkdev_put+0x106/0x110 [] ? mntput+0x35/0x40 [] do_vfs_ioctl+0x460/0x4a0 [] ? ____fput+0xe/0x10 [] ? task_work_run+0xb3/0xd0 [] SyS_ioctl+0x57/0x90 [] ? do_page_fault+0xe/0x10 [] system_call_fastpath+0x16/0x1b reproducer: mkfs.btrfs -draid1 -mraid1 /dev/sdg1 /dev/sdg2 btrfstune -S 1 /dev/sdg1 modprobe -r btrfs && modprobe btrfs mount -o degraded /dev/sdg1 /btrfs btrfs dev add /dev/sdg3 /btrfs Signed-off-by: Anand Jain Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6ca0d9cc46f9..6104676857f5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -555,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) * This is ok to do without rcu read locked because we hold the * uuid mutex so nothing we touch in here is going to disappear. */ - name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); - if (!name) { - kfree(device); - goto error; + if (orig_dev->name) { + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + if (!name) { + kfree(device); + goto error; + } + rcu_assign_pointer(device->name, name); } - rcu_assign_pointer(device->name, name); list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; From 0aeb8a6e67cddeac1d42cf64795fde0641a1cffb Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 30 Jun 2014 17:12:48 +0800 Subject: [PATCH 14/20] btrfs: fix null pointer dereference in btrfs_show_devname when name is null dev->name is null but missing flag is not set. Strictly speaking the missing flag should have been set, but there are more places where code just checks if name is null. For now this patch does the same. stack: BUG: unable to handle kernel NULL pointer dereference at 0000000000000064 IP: [] btrfs_show_devname+0x58/0xf0 [btrfs] [] show_vfsmnt+0x39/0x130 [] m_show+0x16/0x20 [] seq_read+0x296/0x390 [] vfs_read+0x9d/0x160 [] SyS_read+0x49/0x90 [] system_call_fastpath+0x16/0x1b reproducer: mkfs.btrfs -draid1 -mraid1 /dev/sdg1 /dev/sdg2 btrfstune -S 1 /dev/sdg1 modprobe -r btrfs && modprobe btrfs mount -o degraded /dev/sdg1 /btrfs btrfs dev add /dev/sdg3 /btrfs Signed-off-by: Anand Jain Signed-off-by: Chris Mason --- fs/btrfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0927e463afca..8e16bca69c56 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1811,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) list_for_each_entry(dev, head, dev_list) { if (dev->missing) continue; + if (!dev->name) + continue; if (!first_dev || dev->devid < first_dev->devid) first_dev = dev; } From 14f5979633a67de81b9bd4a36a0eb99125728f9b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 29 Jun 2014 21:45:40 +0100 Subject: [PATCH 15/20] Btrfs: fix use-after-free when cloning a trailing file hole The transaction handle was being used after being freed. Cc: Chris Mason Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02dc64bbf1cb..2a99f4987bb1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3142,7 +3142,6 @@ out: static void clone_update_extent_map(struct inode *inode, const struct btrfs_trans_handle *trans, const struct btrfs_path *path, - struct btrfs_file_extent_item *fi, const u64 hole_offset, const u64 hole_len) { @@ -3157,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode, return; } - if (fi) { + if (path) { + struct btrfs_file_extent_item *fi; + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); btrfs_extent_item_to_extent_map(inode, path, fi, false, em); em->generation = -1; if (btrfs_file_extent_type(path->nodes[0], fi) == @@ -3511,18 +3514,15 @@ process_slot: btrfs_item_ptr_offset(leaf, slot), size); inode_add_bytes(inode, datal); - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); } /* If we have an implicit hole (NO_HOLES feature). */ if (drop_start < new_key.offset) clone_update_extent_map(inode, trans, - path, NULL, drop_start, + NULL, drop_start, new_key.offset - drop_start); - clone_update_extent_map(inode, trans, path, - extent, 0, 0); + clone_update_extent_map(inode, trans, path, 0, 0); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); @@ -3565,12 +3565,10 @@ process_slot: btrfs_end_transaction(trans, root); goto out; } + clone_update_extent_map(inode, trans, NULL, last_dest_end, + destoff + len - last_dest_end); ret = clone_finish_inode_update(trans, inode, destoff + len, destoff, olen); - if (ret) - goto out; - clone_update_extent_map(inode, trans, path, NULL, last_dest_end, - destoff + len - last_dest_end); } out: From 0a4eaea892a479aeebccde65986b27cfb6e33a78 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 20 Jun 2014 11:31:44 +0200 Subject: [PATCH 16/20] btrfs: remove stale comment from btrfs_flush_all_pending_stuffs Commit fcebe4562dec83b3f8d3088d77584727b09130b2 (Btrfs: rework qgroup accounting) removed the qgroup accounting after delayed refs. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a32ad65b6cdc..98e76a333596 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1617,11 +1617,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, int ret; ret = btrfs_run_delayed_items(trans, root); - /* - * running the delayed items may have added new refs. account - * them now so that they hinder processing of more delayed refs - * as little as possible. - */ if (ret) return ret; From 130d5b415a091e493ac1508b9d27bbb85ba7b8c0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 20 Jun 2014 11:43:20 +0200 Subject: [PATCH 17/20] btrfs: use E2BIG instead of EIO if compression does not help Return codes got updated in 60e1975acb48fc3d74a3422b21dde74c977ac3d5 (btrfs: return errno instead of -1 from compression) lzo wrapper returns E2BIG in this case, do the same for zlib. Signed-off-by: David Sterba --- fs/btrfs/zlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 4f196314c0c1..b67d8fc81277 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws, if (workspace->def_strm.total_in > 8192 && workspace->def_strm.total_in < workspace->def_strm.total_out) { - ret = -EIO; + ret = -E2BIG; goto out; } /* we need another page for writing out. Test this From d288db5dc0110c8e0732d099aaf7a05e2ea0e0c8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 2 Jul 2014 16:58:01 +0800 Subject: [PATCH 18/20] Btrfs: fix race of using total_bytes_pinned This percpu counter @total_bytes_pinned is introduced to skip unnecessary operations of 'commit transaction', it accounts for those space we may free but are stuck in delayed refs. And we zero out @space_info->total_bytes_pinned every transaction period so we have a better idea of how much space we'll actually free up by committing this transaction. However, we do the 'zero out' part a little earlier, before we actually unpin space, so we end up returning ENOSPC when we actually have free space that's just unpinned from committing transaction. xfstests/generic/074 complained then. This fixes it by actually accounting the percpu pinned number when 'unpin', and since it's protected by space_info->lock, the race is gone now. Signed-off-by: Liu Bo Reviewed-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99c253918208..813537f362f9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5678,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_caching_control *next; struct btrfs_caching_control *caching_ctl; struct btrfs_block_group_cache *cache; - struct btrfs_space_info *space_info; down_write(&fs_info->commit_root_sem); @@ -5701,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, up_write(&fs_info->commit_root_sem); - list_for_each_entry_rcu(space_info, &fs_info->space_info, list) - percpu_counter_set(&space_info->total_bytes_pinned, 0); - update_global_block_rsv(fs_info); } @@ -5741,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) spin_lock(&cache->lock); cache->pinned -= len; space_info->bytes_pinned -= len; + percpu_counter_add(&space_info->total_bytes_pinned, -len); if (cache->ro) { space_info->bytes_readonly += len; readonly = true; From be2c765dff9e5584965f78853c2addd2bb926946 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 2 Jul 2014 10:20:48 -0700 Subject: [PATCH 19/20] Btrfs: fix btrfs_print_leaf for skinny metadata We wouldn't actuall print the extent information if we had a skinny metadata item, this fixes that. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 6efd70d3b64f..9626b4ad3b9a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb, btrfs_extent_data_ref_count(eb, ref)); } -static void print_extent_item(struct extent_buffer *eb, int slot) +static void print_extent_item(struct extent_buffer *eb, int slot, int type) { struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref; @@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot) struct btrfs_disk_key key; unsigned long end; unsigned long ptr; - int type; u32 item_size = btrfs_item_size_nr(eb, slot); u64 flags; u64 offset; @@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot) btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), flags); - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if ((type == BTRFS_EXTENT_ITEM_KEY) && + flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); @@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: - print_extent_item(l, i); + case BTRFS_METADATA_ITEM_KEY: + print_extent_item(l, i, type); break; case BTRFS_TREE_BLOCK_REF_KEY: printk(KERN_INFO "\t\ttree block backref\n"); From abdd2e80a57e5f7278f47913315065f0a3d78d20 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 24 Jun 2014 17:46:58 +0100 Subject: [PATCH 20/20] Btrfs: fix crash when starting transaction Often when starting a transaction we commit the currently running transaction, which can end up writing block group caches when the current process has its journal_info set to NULL (and not to a transaction). This makes our assertion at btrfs_check_data_free_space() (current_journal != NULL) fail, resulting in a crash/hang. Therefore fix it by setting journal_info. Two different traces of this issue follow below. 1) [51502.241936] BTRFS: assertion failed: current->journal_info, file: fs/btrfs/extent-tree.c, line: 3670 [51502.242213] ------------[ cut here ]------------ [51502.242493] kernel BUG at fs/btrfs/ctree.h:3964! [51502.242669] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC (...) [51502.244010] Call Trace: [51502.244010] [] btrfs_check_data_free_space+0x395/0x3a0 [btrfs] [51502.244010] [] btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs] [51502.244010] [] commit_cowonly_roots+0x164/0x226 [btrfs] [51502.244010] [] btrfs_commit_transaction+0x4ed/0xab0 [btrfs] [51502.244010] [] ? _raw_spin_unlock+0x2b/0x40 [51502.244010] [] start_transaction+0x459/0x620 [btrfs] [51502.244010] [] btrfs_start_transaction+0x1b/0x20 [btrfs] [51502.244010] [] __unlink_start_trans+0x31/0xe0 [btrfs] [51502.244010] [] btrfs_unlink+0x37/0xc0 [btrfs] [51502.244010] [] ? do_unlinkat+0x114/0x2a0 [51502.244010] [] vfs_unlink+0xcc/0x150 [51502.244010] [] do_unlinkat+0x260/0x2a0 [51502.244010] [] ? filp_close+0x64/0x90 [51502.244010] [] ? trace_hardirqs_on_caller+0x16/0x1e0 [51502.244010] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [51502.244010] [] SyS_unlinkat+0x1b/0x40 [51502.244010] [] system_call_fastpath+0x16/0x1b [51502.244010] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 c2 71 13 36 a0 48 89 fe 31 c0 48 c7 c7 b8 43 36 a0 48 89 e5 e8 5d b0 32 e1 <0f> 0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5 [51502.244010] RIP [] assfail.constprop.88+0x1e/0x20 [btrfs] 2) [25405.097230] BTRFS: assertion failed: current->journal_info, file: fs/btrfs/extent-tree.c, line: 3670 [25405.097488] ------------[ cut here ]------------ [25405.097767] kernel BUG at fs/btrfs/ctree.h:3964! [25405.097940] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC (...) [25405.100008] Call Trace: [25405.100008] [] btrfs_check_data_free_space+0x395/0x3a0 [btrfs] [25405.100008] [] btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs] [25405.100008] [] commit_cowonly_roots+0x164/0x226 [btrfs] [25405.100008] [] btrfs_commit_transaction+0x4ed/0xab0 [btrfs] [25405.100008] [] ? bit_waitqueue+0xc0/0xc0 [25405.100008] [] start_transaction+0x459/0x620 [btrfs] [25405.100008] [] btrfs_start_transaction+0x1b/0x20 [btrfs] [25405.100008] [] btrfs_create+0x47/0x210 [btrfs] [25405.100008] [] ? btrfs_permission+0x3c/0x80 [btrfs] [25405.100008] [] vfs_create+0x9b/0x130 [25405.100008] [] do_last+0x849/0xe20 [25405.100008] [] ? link_path_walk+0x79/0x820 [25405.100008] [] path_openat+0xc5/0x690 [25405.100008] [] ? trace_hardirqs_on+0xd/0x10 [25405.100008] [] ? __alloc_fd+0x32/0x1d0 [25405.100008] [] do_filp_open+0x43/0xa0 [25405.100008] [] ? __alloc_fd+0x151/0x1d0 [25405.100008] [] do_sys_open+0x13c/0x230 [25405.100008] [] ? trace_hardirqs_on_caller+0x16/0x1e0 [25405.100008] [] SyS_open+0x22/0x30 [25405.100008] [] system_call_fastpath+0x16/0x1b [25405.100008] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 c2 51 13 36 a0 48 89 fe 31 c0 48 c7 c7 d0 43 36 a0 48 89 e5 e8 6d b5 32 e1 <0f> 0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5 [25405.100008] RIP [] assfail.constprop.88+0x1e/0x20 [btrfs] Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 98e76a333596..5f379affdf23 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -493,6 +493,7 @@ again: smp_mb(); if (cur_trans->state >= TRANS_STATE_BLOCKED && may_wait_transaction(root, type)) { + current->journal_info = h; btrfs_commit_transaction(h, root); goto again; }