Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "Dave had a small collection of fixes to the new free space tree code,
  one of which was keeping our sysfs files more up to date with feature
  bits as different things get enabled (lzo, raid5/6, etc).

  I should have kept the sysfs stuff for rc3, since we always manage to
  trip over something.  This time it was GFP_KERNEL from somewhere that
  is NOFS only.  Instead of rebasing it out I've put a revert in, and
  we'll fix it properly for rc3.

  Otherwise, Filipe fixed a btrfs DIO race and Qu Wenruo fixed up a
  use-after-free in our tracepoints that Dave Jones reported"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Revert "btrfs: synchronize incompat feature bits with sysfs files"
  btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc
  btrfs: sysfs: check initialization state before updating features
  Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
  btrfs: async-thread: Fix a use-after-free error for trace
  Btrfs: fix race between fsync and lockless direct IO writes
  btrfs: add free space tree to the cow-only list
  btrfs: add free space tree to lockdep classes
  btrfs: tweak free space tree bitmap allocation
  btrfs: tests: switch to GFP_KERNEL
  btrfs: synchronize incompat feature bits with sysfs files
  btrfs: sysfs: introduce helper for syncing bits with sysfs files
  btrfs: sysfs: add free-space-tree bit attribute
  btrfs: sysfs: fix typo in compat_ro attribute definition
This commit is contained in:
Linus Torvalds 2016-01-29 15:46:49 -08:00
commit d3f71ae711
11 changed files with 113 additions and 32 deletions

View File

@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
list_add_tail(&work->ordered_list, &wq->ordered_list); list_add_tail(&work->ordered_list, &wq->ordered_list);
spin_unlock_irqrestore(&wq->list_lock, flags); spin_unlock_irqrestore(&wq->list_lock, flags);
} }
queue_work(wq->normal_wq, &work->normal_work);
trace_btrfs_work_queued(work); trace_btrfs_work_queued(work);
queue_work(wq->normal_wq, &work->normal_work);
} }
void btrfs_queue_work(struct btrfs_workqueue *wq, void btrfs_queue_work(struct btrfs_workqueue *wq,

View File

@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
{ .id = 0, .name_stem = "tree" }, { .id = 0, .name_stem = "tree" },
}; };
@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
int again; int again;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
set_freezable();
do { do {
again = 0; again = 0;

View File

@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
static unsigned long *alloc_bitmap(u32 bitmap_size) static unsigned long *alloc_bitmap(u32 bitmap_size)
{ {
void *mem;
/*
* The allocation size varies, observed numbers were < 4K up to 16K.
* Using vmalloc unconditionally would be too heavy, we'll try
* contiguous allocations first.
*/
if (bitmap_size <= PAGE_SIZE)
return kzalloc(bitmap_size, GFP_NOFS);
mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
if (mem)
return mem;
return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL); PAGE_KERNEL);
} }
@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = 0; ret = 0;
out: out:
vfree(bitmap); kvfree(bitmap);
if (ret) if (ret)
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
return ret; return ret;
@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
ret = 0; ret = 0;
out: out:
vfree(bitmap); kvfree(bitmap);
if (ret) if (ret)
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
return ret; return ret;

View File

@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, /*
ins.offset, ins.offset, ins.offset, 0); * Create the ordered extent before the extent map. This is to avoid
if (IS_ERR(em)) { * races with the fast fsync path that would lead to it logging file
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); * extent items that point to disk extents that were not yet written to.
return em; * The fast fsync path collects ordered extents into a local list and
} * then collects all the new extent maps, so we must create the ordered
* extent first and make sure the fast fsync path collects any new
* ordered extents after collecting new extent maps as well.
* The fsync path simply can not rely on inode_dio_wait() because it
* causes deadlock with AIO.
*/
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0); ins.offset, ins.offset, 0);
if (ret) { if (ret) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
free_extent_map(em);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em)) {
struct btrfs_ordered_extent *oe;
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
oe = btrfs_lookup_ordered_extent(inode, start);
ASSERT(oe);
if (WARN_ON(!oe))
return em;
set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
btrfs_remove_ordered_extent(inode, oe);
/* Once for our lookup and once for the ordered extents tree. */
btrfs_put_ordered_extent(oe);
btrfs_put_ordered_extent(oe);
}
return em; return em;
} }

View File

@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
root_objectid == BTRFS_TREE_LOG_OBJECTID || root_objectid == BTRFS_TREE_LOG_OBJECTID ||
root_objectid == BTRFS_CSUM_TREE_OBJECTID || root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
root_objectid == BTRFS_UUID_TREE_OBJECTID || root_objectid == BTRFS_UUID_TREE_OBJECTID ||
root_objectid == BTRFS_QUOTA_TREE_OBJECTID) root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return 1; return 1;
return 0; return 0;
} }

View File

@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
static struct attribute *btrfs_supported_feature_attrs[] = { static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref), BTRFS_FEAT_ATTR_PTR(mixed_backref),
@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid56), BTRFS_FEAT_ATTR_PTR(raid56),
BTRFS_FEAT_ATTR_PTR(skinny_metadata), BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes), BTRFS_FEAT_ATTR_PTR(no_holes),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
NULL NULL
}; };
@ -780,6 +782,39 @@ failure:
return error; return error;
} }
/*
* Change per-fs features in /sys/fs/btrfs/UUID/features to match current
* values in superblock. Call after any changes to incompat/compat_ro flags
*/
void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
u64 bit, enum btrfs_feature_set set)
{
struct btrfs_fs_devices *fs_devs;
struct kobject *fsid_kobj;
u64 features;
int ret;
if (!fs_info)
return;
features = get_features(fs_info, set);
ASSERT(bit & supported_feature_masks[set]);
fs_devs = fs_info->fs_devices;
fsid_kobj = &fs_devs->fsid_kobj;
if (!fsid_kobj->state_initialized)
return;
/*
* FIXME: this is too heavy to update just one value, ideally we'd like
* to use sysfs_update_group but some refactoring is needed first.
*/
sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
}
static int btrfs_init_debugfs(void) static int btrfs_init_debugfs(void)
{ {
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS

View File

@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
struct kobject *parent); struct kobject *parent);
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
u64 bit, enum btrfs_feature_set set);
#endif /* _BTRFS_SYSFS_H_ */ #endif /* _BTRFS_SYSFS_H_ */

View File

@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
{ {
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
GFP_NOFS); GFP_KERNEL);
if (!fs_info) if (!fs_info)
return fs_info; return fs_info;
fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
GFP_NOFS); GFP_KERNEL);
if (!fs_info->fs_devices) { if (!fs_info->fs_devices) {
kfree(fs_info); kfree(fs_info);
return NULL; return NULL;
} }
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
GFP_NOFS); GFP_KERNEL);
if (!fs_info->super_copy) { if (!fs_info->super_copy) {
kfree(fs_info->fs_devices); kfree(fs_info->fs_devices);
kfree(fs_info); kfree(fs_info);
@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
cache = kzalloc(sizeof(*cache), GFP_NOFS); cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (!cache) if (!cache)
return NULL; return NULL;
cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
GFP_NOFS); GFP_KERNEL);
if (!cache->free_space_ctl) { if (!cache->free_space_ctl) {
kfree(cache); kfree(cache);
return NULL; return NULL;

View File

@ -94,7 +94,7 @@ static int test_find_delalloc(void)
* test. * test.
*/ */
for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) { if (!page) {
test_msg("Failed to allocate test page\n"); test_msg("Failed to allocate test page\n");
ret = -ENOMEM; ret = -ENOMEM;
@ -113,7 +113,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---| * |--- delalloc ---|
* |--- search ---| * |--- search ---|
*/ */
set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
start = 0; start = 0;
end = 0; end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@ -144,7 +144,7 @@ static int test_find_delalloc(void)
test_msg("Couldn't find the locked page\n"); test_msg("Couldn't find the locked page\n");
goto out_bits; goto out_bits;
} }
set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
start = test_start; start = test_start;
end = 0; end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@ -199,7 +199,7 @@ static int test_find_delalloc(void)
* *
* We are re-using our test_start from above since it works out well. * We are re-using our test_start from above since it works out well.
*/ */
set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
start = test_start; start = test_start;
end = 0; end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@ -262,7 +262,7 @@ static int test_find_delalloc(void)
} }
ret = 0; ret = 0;
out_bits: out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
out: out:
if (locked_page) if (locked_page)
page_cache_release(locked_page); page_cache_release(locked_page);
@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
test_msg("Running extent buffer bitmap tests\n"); test_msg("Running extent buffer bitmap tests\n");
bitmap = kmalloc(len, GFP_NOFS); bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) { if (!bitmap) {
test_msg("Couldn't allocate test bitmap\n"); test_msg("Couldn't allocate test bitmap\n");
return -ENOMEM; return -ENOMEM;

View File

@ -974,7 +974,7 @@ static int test_extent_accounting(void)
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
NULL, GFP_NOFS); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE+8191, BTRFS_MAX_EXTENT_SIZE+8191,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS); NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
@ -1096,7 +1096,7 @@ out:
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS); NULL, GFP_KERNEL);
iput(inode); iput(inode);
btrfs_free_dummy_root(root); btrfs_free_dummy_root(root);
return ret; return ret;

View File

@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *inode,
struct btrfs_path *path, struct btrfs_path *path,
struct list_head *logged_list, struct list_head *logged_list,
struct btrfs_log_ctx *ctx) struct btrfs_log_ctx *ctx,
const u64 start,
const u64 end)
{ {
struct extent_map *em, *n; struct extent_map *em, *n;
struct list_head extents; struct list_head extents;
@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
} }
list_sort(NULL, &extents, extent_cmp); list_sort(NULL, &extents, extent_cmp);
/*
* Collect any new ordered extents within the range. This is to
* prevent logging file extent items without waiting for the disk
* location they point to being written. We do this only to deal
* with races against concurrent lockless direct IO writes.
*/
btrfs_get_logged_extents(inode, logged_list, start, end);
process: process:
while (!list_empty(&extents)) { while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list); em = list_entry(extents.next, struct extent_map, list);
@ -4701,7 +4709,7 @@ log_extents:
goto out_unlock; goto out_unlock;
} }
ret = btrfs_log_changed_extents(trans, root, inode, dst_path, ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
&logged_list, ctx); &logged_list, ctx, start, end);
if (ret) { if (ret) {
err = ret; err = ret;
goto out_unlock; goto out_unlock;