for-5.9-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl8jAKwACgkQxWXV+ddt
 WDtFvQ/7BIMM6cn+k/LoiK6cTTpq9DKTMoK64XzXJsOiY4ey6pXE0iSyVyn3rC6k
 C+wAafdd7UPGPnI5z7L1lJOI7cE/X3PmADmAWB6WhARp19B2SmKfkF+jFAr+T4dE
 OZw5lNqHSGv/aByBq8qegrAhWjpRR3VZtCCGW5KvN/strx7MC7t9wFZAB0zIsdKX
 aK37VKYhoc+MOF1ikUDn4lRSIjqQYJetjvgC6Yt9dLfx+5oLOK8tpm1XkifN/1xs
 HrRR9EpDTKlfJFDee1O+0gof6cKWTqFsbup1EFTrDbkA11zx8r6itBGY5G8P3zMh
 JCsVOOJeDLecp1cz1ZWFpyBgrEAN7uHTY0hZbCZgN/dKbSKmv51iujdXB+dDOtxF
 cSPywc0NxmftvBbweInwBfsA54BHI0XxCCA0U1yA8xgxPmBE15t81b7F56zmCRke
 mSJxAP1dcX8gmL3mzEOUUuKkVbFJ0lIMi2YVkM1lud8Vn4xaWU9HzXlzEvkh7At0
 tqlb+LHzaxxVU2m6/6W/KEuiXW1S7/q4nX87wvyMLnylHAaSlA+UtAp3t1q92rdJ
 3VGzyvbgBRT2H+22DgCkrPTRlhOifeeuXT3nOwehY4AVkENYQrENb7FmqvppCEtl
 v7yTBxxe4zPEjc8dm7o9RBYaVESVFXVQtpCHwz0D+p+adzIYmVM=
 =HNGC
 -----END PGP SIGNATURE-----

Merge tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "We don't have any big feature updates this time, there are lots of
  small enhacements or fixes. A highlight perhaps is the parallel fsync
  performance improvements, numbers below.

  Regarding the dio/iomap that was reverted last time, the required API
  changes are likely to land in the upcoming cycle, the btrfs part will
  be updated afterwards.

  User visible changes:

   - new mount option rescue= to group all recovery-related mount
     options so we don't have many specific options, currently
     introducing only aliases for existing options, future extensions
     are in development to allow read-only mount with partially damaged
     structures:
      - usebackuproot is an alias for rescue=usebackuproot
      - nologreplay is an alias for rescue=nologreplay

   - start deprecation of mount option inode_cache, removal scheduled to
     v5.11

   - removed deprecated mount options alloc_start and subvolrootid

   - device stats corruption counter gets incremented when a checksum
     mismatch is found

   - qgroup information exported in /sys/fs/btrfs/<UUID>/qgroups/<id>
     using sysfs

   - add link /sys/fs/btrfs/<UUID>/bdi pointing to the associated
     backing dev info

   - FS_INFO ioctl enhancements:
      - add flags to request/describe newly added items
      - new item: numeric checksum type and checksum size
      - new item: generation
      - new item: metadata_uuid

   - seed device: with one new read-write device added, print the new
     device information in /proc/mounts

   - balance: detect cancellation by Ctrl-C in existing cancellation
     points

  Performance improvements:

   - optimized versions of various helpers on little-endian
     architectures, where we don't have to do LE/BE conversion from
     on-disk format

   - tree-log/fsync optimizations leading to lower max latency reported
     by dbench, reduced by about 12%

   - all chunk tree leaves are prefetched at mount time, can improve
     mount time on large (terabyte-sized) filesystems

   - speed up parallel fsync of files with reflinked/deduped extents,
     with jobs 16 to 1024 the throughput gets improved roughly by 50% on
     average and runtime decreased roughly by 30% on average, notable
     outlier is 128 jobs with +121.2% on throughput and -54.6% runtime

   - another speed up of parallel fsync, reduce number of checksum tree
     lookups and contention, the improvements start to show up with 2
     tasks with +20% throughput and -16% runtime up to 64 with +200%
     throughput and -66% runtime

  Core:

   - umount-time qgroup leak checker

   - qgroups
      - add a way to unreserve partial range after failure, avoiding
        some EDQUOT errors
      - improved flushing logic when EDQUOT is hit

   - possible EINTR interruption caused by failed reservations after
     transaction start is better handled and documented

   - transaction abort errors are unified to EROFS in case it's not the
     original reason of abort or we don't have other way to determine
     the reason

  Fixes:

   - make truncate succeed on a NOCOW file even if data space is
     exhausted

   - fix cancelling balance on filesystem with exhausted metadata space

   - anon block device:
      - preallocate anon bdev when subvolume is created to report
        failure early
      - shorten time the anon bdev id is allocated
      - don't allocate anon bdev for internal roots

   - minor memory leak in ref-verify

   - refuse invalid combinations of compression and NOCOW file flags

   - lockdep fixes, updating the device locks

   - remove obsolete fallback logic for block group profile adjustments
     when switching from 1 to more devices, causing allocation of
     unwanted block groups

  Other cleanups, refactoring, simplifications:

   - conversions from struct inode to struct btrfs_inode in internal
     functions

   - removal of unused struct members"

* tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (151 commits)
  btrfs: do not set the full sync flag on the inode during page release
  btrfs: release old extent maps during page release
  btrfs: fix race between page release and a fast fsync
  btrfs: open-code remount flag setting in btrfs_remount
  btrfs: if we're restriping, use the target restripe profile
  btrfs: don't adjust bg flags and use default allocation profiles
  btrfs: fix lockdep splat from btrfs_dump_space_info
  btrfs: move the chunk_mutex in btrfs_read_chunk_tree
  btrfs: open device without device_list_mutex
  btrfs: sysfs: use NOFS for device creation
  btrfs: return EROFS for BTRFS_FS_STATE_ERROR cases
  btrfs: document special case error codes for fs errors
  btrfs: don't WARN if we abort a transaction with EROFS
  btrfs: reduce contention on log trees when logging checksums
  btrfs: remove done label in writepage_delalloc
  btrfs: add comments for btrfs_reserve_flush_enum
  btrfs: relocation: review the call sites which can be interrupted by signal
  btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree
  btrfs: relocation: allow signal to cancel balance
  btrfs: raid56: remove out label in __raid56_parity_recover
  ...
This commit is contained in:
Linus Torvalds 2020-08-03 09:41:48 -07:00
commit 6dec9f406c
47 changed files with 1913 additions and 1225 deletions

View File

@ -65,11 +65,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
spin_lock(&fs_info->balance_lock);
target = get_restripe_target(fs_info, flags);
if (target) {
/* Pick target profile only if it's already available */
if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
spin_unlock(&fs_info->balance_lock);
return extended_to_chunk(target);
}
spin_unlock(&fs_info->balance_lock);
return extended_to_chunk(target);
}
spin_unlock(&fs_info->balance_lock);
@ -118,12 +115,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
void btrfs_get_block_group(struct btrfs_block_group *cache)
{
atomic_inc(&cache->count);
refcount_inc(&cache->refs);
}
void btrfs_put_block_group(struct btrfs_block_group *cache)
{
if (atomic_dec_and_test(&cache->count)) {
if (refcount_dec_and_test(&cache->refs)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
@ -1111,7 +1108,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
@ -1143,8 +1139,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
remove_em = (atomic_read(&block_group->frozen) == 0);
spin_unlock(&block_group->lock);
mutex_unlock(&fs_info->chunk_mutex);
if (remove_em) {
struct extent_map_tree *em_tree;
@ -1532,21 +1526,70 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
spin_unlock(&fs_info->unused_bgs_lock);
}
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
struct btrfs_path *path)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_block_group_item bg;
struct extent_buffer *leaf;
int slot;
u64 flags;
int ret = 0;
slot = path->slots[0];
leaf = path->nodes[0];
em_tree = &fs_info->mapping_tree;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
read_unlock(&em_tree->lock);
if (!em) {
btrfs_err(fs_info,
"logical %llu len %llu found bg but no related chunk",
key->objectid, key->offset);
return -ENOENT;
}
if (em->start != key->objectid || em->len != key->offset) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
key->objectid, key->offset, em->start, em->len);
ret = -EUCLEAN;
goto out_free_em;
}
read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
sizeof(bg));
flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
key->objectid, key->offset, flags,
(BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
ret = -EUCLEAN;
}
out_free_em:
free_extent_map(em);
return ret;
}
static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_key *key)
{
struct btrfs_root *root = fs_info->extent_root;
int ret = 0;
int ret;
struct btrfs_key found_key;
struct extent_buffer *leaf;
struct btrfs_block_group_item bg;
u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
while (1) {
slot = path->slots[0];
@ -1563,49 +1606,10 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
struct extent_map_tree *em_tree;
struct extent_map *em;
em_tree = &root->fs_info->mapping_tree;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, found_key.objectid,
found_key.offset);
read_unlock(&em_tree->lock);
if (!em) {
btrfs_err(fs_info,
"logical %llu len %llu found bg but no related chunk",
found_key.objectid, found_key.offset);
ret = -ENOENT;
} else if (em->start != found_key.objectid ||
em->len != found_key.offset) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
found_key.objectid, found_key.offset,
em->start, em->len);
ret = -EUCLEAN;
} else {
read_extent_buffer(leaf, &bg,
btrfs_item_ptr_offset(leaf, slot),
sizeof(bg));
flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
if (flags != (em->map_lookup->type &
BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
found_key.objectid,
found_key.offset, flags,
(BTRFS_BLOCK_GROUP_TYPE_MASK &
em->map_lookup->type));
ret = -EUCLEAN;
} else {
ret = 0;
}
}
free_extent_map(em);
goto out;
ret = read_bg_from_eb(fs_info, &found_key, path);
break;
}
path->slots[0]++;
}
out:
@ -1657,19 +1661,12 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
return -EIO;
map = em->map_lookup;
data_stripe_length = em->len;
data_stripe_length = em->orig_block_len;
io_stripe_size = map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
data_stripe_length = div_u64(data_stripe_length,
map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
data_stripe_length = div_u64(data_stripe_length, map->num_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
data_stripe_length = div_u64(data_stripe_length,
nr_data_stripes(map));
/* For RAID5/6 adjust to a full IO stripe length */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
io_stripe_size = map->stripe_len * nr_data_stripes(map);
}
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
if (!buf) {
@ -1748,25 +1745,12 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return ret;
while (nr--) {
u64 start, len;
if (logical[nr] > cache->start + cache->length)
continue;
if (logical[nr] + stripe_len <= cache->start)
continue;
start = logical[nr];
if (start < cache->start) {
start = cache->start;
len = (logical[nr] + stripe_len) - start;
} else {
len = min_t(u64, stripe_len,
cache->start + cache->length - start);
}
u64 len = min_t(u64, stripe_len,
cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
ret = btrfs_add_excluded_extent(fs_info, start, len);
ret = btrfs_add_excluded_extent(fs_info, logical[nr],
len);
if (ret) {
kfree(logical);
return ret;
@ -1818,7 +1802,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
atomic_set(&cache->count, 1);
refcount_set(&cache->refs, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
@ -2207,54 +2191,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
return 0;
}
static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 num_devices;
u64 stripped;
/*
* if restripe for this chunk_type is on pick target profile and
* return, otherwise do the usual balance
*/
stripped = get_restripe_target(fs_info, flags);
if (stripped)
return extended_to_chunk(stripped);
num_devices = fs_info->fs_devices->rw_devices;
stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
/* turn raid0 into single device chunks */
if (flags & BTRFS_BLOCK_GROUP_RAID0)
return stripped;
/* turn mirroring into duplication */
if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID10))
return stripped | BTRFS_BLOCK_GROUP_DUP;
} else {
/* they already had raid on here, just return */
if (flags & stripped)
return flags;
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
/* switch duplicated blocks with raid1 */
if (flags & BTRFS_BLOCK_GROUP_DUP)
return stripped | BTRFS_BLOCK_GROUP_RAID1;
/* this is drive concat, leave it alone */
}
return flags;
}
/*
* Mark one block group RO, can be called several times for the same block
* group.
@ -2300,7 +2236,7 @@ again:
* If we are changing raid levels, try to allocate a
* corresponding block group with the new raid level.
*/
alloc_flags = update_block_group_flags(fs_info, cache->flags);
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
ret = btrfs_chunk_alloc(trans, alloc_flags,
CHUNK_ALLOC_FORCE);
@ -2327,7 +2263,7 @@ again:
ret = inc_block_group_ro(cache, 0);
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(fs_info, cache->flags);
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
mutex_lock(&fs_info->chunk_mutex);
check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
@ -2521,7 +2457,8 @@ again:
num_pages *= 16;
num_pages *= PAGE_SIZE;
ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
num_pages);
if (ret)
goto out_put;
@ -3392,7 +3329,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
ASSERT(list_empty(&block_group->dirty_list));
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
ASSERT(atomic_read(&block_group->count) == 1);
ASSERT(refcount_read(&block_group->refs) == 1);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
@ -3447,7 +3384,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
spin_unlock(&block_group->lock);
if (cleanup) {
mutex_lock(&fs_info->chunk_mutex);
em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->start,
@ -3455,7 +3391,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
BUG_ON(!em); /* logic error, can't happen */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
/* once for us and once for the tree */
free_extent_map(em);

View File

@ -114,8 +114,7 @@ struct btrfs_block_group {
/* For block groups in the same raid type */
struct list_head list;
/* Usage count */
atomic_t count;
refcount_t refs;
/*
* List of struct btrfs_free_clusters for this block group.

View File

@ -151,6 +151,17 @@ struct btrfs_inode {
*/
u64 last_unlink_trans;
/*
* The id/generation of the last transaction where this inode was
* either the source or the destination of a clone/dedupe operation.
* Used when logging an inode to know if there are shared extents that
* need special care when logging checksum items, to avoid duplicate
* checksum items in a log (which can lead to a corruption where we end
* up with missing checksum ranges after log replay).
* Protected by the vfs inode lock.
*/
u64 last_reflink_trans;
/*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.

View File

@ -631,10 +631,8 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
int pass;
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
if (NULL == selected_super) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!selected_super)
return -ENOMEM;
}
list_for_each_entry(device, dev_head, dev_list) {
int i;
@ -795,7 +793,6 @@ static int btrfsic_process_superblock_dev_mirror(
if (NULL == superblock_tmp) {
superblock_tmp = btrfsic_block_alloc();
if (NULL == superblock_tmp) {
pr_info("btrfsic: error, kmalloc failed!\n");
ret = -1;
goto out;
}
@ -921,9 +918,7 @@ static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
struct btrfsic_stack_frame *sf;
sf = kzalloc(sizeof(*sf), GFP_NOFS);
if (NULL == sf)
pr_info("btrfsic: alloc memory failed!\n");
else
if (sf)
sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
return sf;
}
@ -1313,7 +1308,6 @@ static int btrfsic_create_link_to_next_block(
if (NULL == l) {
l = btrfsic_block_link_alloc();
if (NULL == l) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(next_block_ctx);
*next_blockp = NULL;
return -1;
@ -1470,7 +1464,6 @@ static int btrfsic_handle_extent_data(
mirror_num,
&block_was_created);
if (NULL == next_block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&next_block_ctx);
return -1;
}
@ -2013,7 +2006,6 @@ again:
block = btrfsic_block_alloc();
if (NULL == block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&block_ctx);
goto continue_loop;
}
@ -2234,7 +2226,6 @@ static int btrfsic_process_written_superblock(
mirror_num,
&was_created);
if (NULL == next_block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&tmp_next_block_ctx);
return -1;
}
@ -2542,10 +2533,8 @@ static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
&state->block_link_hashtable);
if (NULL == l) {
l = btrfsic_block_link_alloc();
if (NULL == l) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!l)
return NULL;
}
l->block_ref_to = next_block;
l->block_ref_from = from_block;
@ -2589,10 +2578,9 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
struct btrfsic_dev_state *dev_state;
block = btrfsic_block_alloc();
if (NULL == block) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!block)
return NULL;
}
dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
if (NULL == dev_state) {
pr_info("btrfsic: error, lookup dev_state failed!\n");
@ -2797,10 +2785,8 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
return -1;
}
state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n");
if (!state)
return -ENOMEM;
}
if (!btrfsic_is_initialized) {
mutex_init(&btrfsic_mutex);
@ -2829,7 +2815,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
ds = btrfsic_dev_state_alloc();
if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex);
return -ENOMEM;
}

View File

@ -172,18 +172,17 @@ static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
}
static int check_compressed_csum(struct btrfs_inode *inode,
struct compressed_bio *cb,
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
u64 disk_start)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int ret;
struct page *page;
unsigned long i;
char *kaddr;
u8 csum[BTRFS_CSUM_SIZE];
struct compressed_bio *cb = bio->bi_private;
u8 *cb_sum = cb->sums;
if (inode->flags & BTRFS_INODE_NODATASUM)
@ -201,15 +200,15 @@ static int check_compressed_csum(struct btrfs_inode *inode,
if (memcmp(&csum, cb_sum, csum_size)) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
if (btrfs_io_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_io_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
cb_sum += csum_size;
}
ret = 0;
fail:
return ret;
return 0;
}
/* when we finish reading compressed pages from the disk, we
@ -244,7 +243,6 @@ static void end_compressed_bio_read(struct bio *bio)
* Record the correct mirror_num in cb->orig_bio so that
* read-repair can work properly.
*/
ASSERT(btrfs_io_bio(cb->orig_bio));
btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
cb->mirror_num = mirror;
@ -256,7 +254,7 @@ static void end_compressed_bio_read(struct bio *bio)
goto csum_failed;
inode = cb->inode;
ret = check_compressed_csum(BTRFS_I(inode), cb,
ret = check_compressed_csum(BTRFS_I(inode), bio,
(u64)bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
@ -405,7 +403,7 @@ out:
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
@ -413,7 +411,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
struct compressed_bio *cb;
unsigned long bytes_left;
@ -421,7 +419,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct page *page;
u64 first_byte = disk_start;
blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
WARN_ON(!PAGE_ALIGNED(start));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@ -429,7 +427,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
@ -455,7 +453,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
int submit = 0;
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->mapping = inode->vfs_inode.i_mapping;
if (bio->bi_iter.bi_size)
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0);

View File

@ -8,6 +8,8 @@
#include <linux/sizes.h>
struct btrfs_inode;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
* reasonable, so we limit the total size in ram of a compressed extent to
@ -88,7 +90,7 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,

View File

@ -1501,6 +1501,22 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
return 0;
}
#ifdef __LITTLE_ENDIAN
/*
* Compare two keys, on little-endian the disk order is same as CPU order and
* we can avoid the conversion.
*/
static int comp_keys(const struct btrfs_disk_key *disk_key,
const struct btrfs_key *k2)
{
const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
return btrfs_comp_cpu_keys(k1, k2);
}
#else
/*
* compare two keys in a memcmp fashion
*/
@ -1513,6 +1529,7 @@ static int comp_keys(const struct btrfs_disk_key *disk,
return btrfs_comp_cpu_keys(&k1, k2);
}
#endif
/*
* same as comp_keys only with two btrfs_key's

View File

@ -545,11 +545,6 @@ enum {
* (device replace, resize, device add/delete, balance)
*/
BTRFS_FS_EXCL_OP,
/*
* To info transaction_kthread we need an immediate commit so it
* doesn't need to wait for commit_interval
*/
BTRFS_FS_NEED_ASYNC_COMMIT,
/*
* Indicate that balance has been set up from the ioctl and is in the
* main phase. The fs_info::balance_ctl is initialized.
@ -779,6 +774,7 @@ struct btrfs_fs_info {
u32 thread_pool_size;
struct kobject *space_info_kobj;
struct kobject *qgroups_kobj;
u64 total_pinned;
@ -1011,6 +1007,8 @@ enum {
BTRFS_ROOT_DEAD_TREE,
/* The root has a log tree. Used only for subvolume roots. */
BTRFS_ROOT_HAS_LOG_TREE,
/* Qgroup flushing is in progress */
BTRFS_ROOT_QGROUP_FLUSHING,
};
/*
@ -1059,8 +1057,10 @@ struct btrfs_root {
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
struct list_head log_ctxs[2];
/* Used only for log trees of subvolumes, not for the log root tree */
atomic_t log_writers;
atomic_t log_commit[2];
/* Used only for log trees of subvolumes, not for the log root tree */
atomic_t log_batch;
int log_transid;
/* No matter the commit succeeds or not*/
@ -1075,7 +1075,6 @@ struct btrfs_root {
u64 highest_objectid;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
@ -1162,6 +1161,7 @@ struct btrfs_root {
spinlock_t qgroup_meta_rsv_lock;
u64 qgroup_meta_rsv_pertrans;
u64 qgroup_meta_rsv_prealloc;
wait_queue_head_t qgroup_flush_wait;
/* Number of active swapfiles */
atomic_t nr_swapfiles;
@ -1277,18 +1277,18 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
BTRFS_MOUNT_##opt)
#define btrfs_set_and_info(fs_info, opt, fmt, args...) \
{ \
do { \
if (!btrfs_test_opt(fs_info, opt)) \
btrfs_info(fs_info, fmt, ##args); \
btrfs_set_opt(fs_info->mount_opt, opt); \
}
} while (0)
#define btrfs_clear_and_info(fs_info, opt, fmt, args...) \
{ \
do { \
if (btrfs_test_opt(fs_info, opt)) \
btrfs_info(fs_info, fmt, ##args); \
btrfs_clear_opt(fs_info->mount_opt, opt); \
}
} while (0)
/*
* Requests for changes that need to be done during transaction commit.
@ -1895,6 +1895,52 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
#ifdef __LITTLE_ENDIAN
/*
* Optimized helpers for little-endian architectures where CPU and on-disk
* structures have the same endianness and we can skip conversions.
*/
static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key,
const struct btrfs_disk_key *disk_key)
{
memcpy(cpu_key, disk_key, sizeof(struct btrfs_key));
}
static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key,
const struct btrfs_key *cpu_key)
{
memcpy(disk_key, cpu_key, sizeof(struct btrfs_key));
}
static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
struct btrfs_key *cpu_key, int nr)
{
struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
btrfs_node_key(eb, disk_key, nr);
}
static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
struct btrfs_key *cpu_key, int nr)
{
struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
btrfs_item_key(eb, disk_key, nr);
}
static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
const struct btrfs_dir_item *item,
struct btrfs_key *cpu_key)
{
struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key;
btrfs_dir_item_key(eb, item, disk_key);
}
#else
static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
const struct btrfs_disk_key *disk)
{
@ -1936,6 +1982,8 @@ static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
btrfs_disk_key_to_cpu(key, &disk_key);
}
#endif
/* struct btrfs_header */
BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
@ -2232,7 +2280,8 @@ static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
}
/* struct btrfs_file_extent_item */
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item,
type, 8);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
struct btrfs_file_extent_item, disk_bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
@ -2241,6 +2290,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
struct btrfs_file_extent_item, generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
struct btrfs_file_extent_item, num_bytes, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes,
struct btrfs_file_extent_item, ram_bytes, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
struct btrfs_file_extent_item, disk_num_bytes, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
@ -2257,6 +2308,7 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize;
}
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
disk_bytenr, 64);
BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
@ -2508,16 +2560,46 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
/*
* Different levels for to flush space when doing space reservations.
*
* The higher the level, the more methods we try to reclaim space.
*/
enum btrfs_reserve_flush_enum {
/* If we are in the transaction, we can't flush anything.*/
BTRFS_RESERVE_NO_FLUSH,
/*
* Flushing delalloc may cause deadlock somewhere, in this
* case, use FLUSH LIMIT
* Flush space by:
* - Running delayed inode items
* - Allocating a new chunk
*/
BTRFS_RESERVE_FLUSH_LIMIT,
/*
* Flush space by:
* - Running delayed inode items
* - Running delayed refs
* - Running delalloc and waiting for ordered extents
* - Allocating a new chunk
*/
BTRFS_RESERVE_FLUSH_EVICT,
/*
* Flush space by above mentioned methods and by:
* - Running delayed iputs
* - Commiting transaction
*
* Can be interruped by fatal signal.
*/
BTRFS_RESERVE_FLUSH_ALL,
/*
* Pretty much the same as FLUSH_ALL, but can also steal space from
* global rsv.
*
* Can be interruped by fatal signal.
*/
BTRFS_RESERVE_FLUSH_ALL_STEAL,
};
@ -2831,8 +2913,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@ -2875,7 +2957,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@ -2928,7 +3010,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc);
int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
@ -2962,7 +3044,7 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path, u64 start, u64 end,
u64 *drop_end, int drop_cache,
int replace_extent,
@ -2978,10 +3060,13 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct inode *inode, struct page **pages,
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes);
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
@ -3194,7 +3279,7 @@ do { \
/* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
&((trans)->fs_info->fs_state))) { \
if ((errno) != -EIO) { \
if ((errno) != -EIO && (errno) != -EROFS) { \
WARN(1, KERN_DEBUG \
"BTRFS: Transaction aborted (error %d)\n", \
(errno)); \
@ -3378,7 +3463,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);

View File

@ -237,10 +237,10 @@ commit_trans:
return 0;
}
int btrfs_check_data_free_space(struct inode *inode,
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
/* align the range */
@ -248,14 +248,14 @@ int btrfs_check_data_free_space(struct inode *inode,
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
ret = btrfs_alloc_data_chunk_ondemand(inode, len);
if (ret < 0)
return ret;
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
if (ret < 0)
btrfs_free_reserved_data_space_noquota(inode, start, len);
btrfs_free_reserved_data_space_noquota(fs_info, len);
else
ret = 0;
return ret;
@ -269,16 +269,12 @@ int btrfs_check_data_free_space(struct inode *inode,
* which we can't sleep and is sure it won't affect qgroup reserved space.
* Like clear_bit_hook().
*/
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_space_info *data_sinfo;
/* Make sure the range is aligned to sectorsize */
len = round_up(start + len, fs_info->sectorsize) -
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
data_sinfo = fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
@ -293,17 +289,17 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
* This one will handle the per-inode data rsv map for accurate reserved
* space framework.
*/
void btrfs_free_reserved_data_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
/* Make sure the range is aligned to sectorsize */
len = round_up(start + len, root->fs_info->sectorsize) -
round_down(start, root->fs_info->sectorsize);
start = round_down(start, root->fs_info->sectorsize);
len = round_up(start + len, fs_info->sectorsize) -
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(inode, start, len);
btrfs_free_reserved_data_space_noquota(fs_info, len);
btrfs_qgroup_free_data(inode, reserved, start, len);
}
@ -557,7 +553,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
* Return 0 for success
* Return <0 for error(-ENOSPC or -EQUOT)
*/
int btrfs_delalloc_reserve_space(struct inode *inode,
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len)
{
int ret;
@ -565,7 +561,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
ret = btrfs_delalloc_reserve_metadata(inode, len);
if (ret < 0)
btrfs_free_reserved_data_space(inode, *reserved, start, len);
return ret;
@ -583,10 +579,10 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
* list if there are no delalloc bytes left.
* Also it will handle the qgroup reserved space.
*/
void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free)
{
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
btrfs_delalloc_release_metadata(inode, len, qgroup_free);
btrfs_free_reserved_data_space(inode, reserved, start, len);
}

View File

@ -6,18 +6,18 @@
struct extent_changeset;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct inode *inode,
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_free_reserved_data_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
u64 len);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_space(struct inode *inode,
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
#endif /* BTRFS_DELALLOC_SPACE_H */

View File

@ -1116,6 +1116,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
mutex_init(&root->log_mutex);
mutex_init(&root->ordered_extent_mutex);
mutex_init(&root->delalloc_mutex);
init_waitqueue_head(&root->qgroup_flush_wait);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
@ -1141,10 +1142,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
if (!dummy)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
@ -1395,7 +1392,12 @@ alloc_fail:
goto out;
}
static int btrfs_init_fs_root(struct btrfs_root *root)
/*
* Initialize subvolume root in-memory structure
*
* @anon_dev: anonymous device to attach to the root, if zero, allocate new
*/
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{
int ret;
unsigned int nofs_flag;
@ -1428,9 +1430,20 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
spin_lock_init(&root->ino_cache_lock);
init_waitqueue_head(&root->ino_cache_wait);
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
/*
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
if (is_fstree(root->root_key.objectid) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
} else {
root->anon_dev = anon_dev;
}
}
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
@ -1534,8 +1547,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
}
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
/*
* Get an in-memory reference of a root structure.
*
* For essential trees like root/extent tree, we grab it from fs_info directly.
* For subvolume trees, we check the cached filesystem roots first. If not
* found, then read it from disk and add it to cached fs roots.
*
* Caller should release the root by calling btrfs_put_root() after the usage.
*
* NOTE: Reloc and log trees can't be read by this function as they share the
* same root objectid.
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
* pass 0 for new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev,
bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
@ -1564,6 +1596,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
/* Shouldn't get preallocated anon_dev for cached roots */
ASSERT(!anon_dev);
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
@ -1583,7 +1617,7 @@ again:
goto fail;
}
ret = btrfs_init_fs_root(root);
ret = btrfs_init_fs_root(root, anon_dev);
if (ret)
goto fail;
@ -1616,6 +1650,33 @@ fail:
return ERR_PTR(ret);
}
/*
* Get in-memory reference of a root structure
*
* @objectid: tree objectid
* @check_ref: if set, verify that the tree exists and the item has at least
* one reference
*/
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
}
/*
* Get in-memory reference of a root structure, created as new, optionally pass
* the anonymous block device id
*
* @objectid: tree objectid
* @anon_dev: if zero, allocate a new anonymous block device or use the
* parameter value
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
{
struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
@ -1749,7 +1810,6 @@ static int transaction_kthread(void *arg)
now = ktime_get_seconds();
if (cur->state < TRANS_STATE_COMMIT_START &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) {
spin_unlock(&fs_info->trans_lock);
@ -2001,8 +2061,7 @@ void btrfs_put_root(struct btrfs_root *root)
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
btrfs_drew_lock_destroy(&root->snapshot_lock);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
free_root_extent_buffers(root);
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
#ifdef CONFIG_BTRFS_DEBUG
@ -4058,6 +4117,11 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
ASSERT(list_empty(&fs_info->delayed_iputs));
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
if (btrfs_check_quota_leak(fs_info)) {
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
btrfs_err(fs_info, "qgroup reserved space leaked");
}
btrfs_free_qgroup_config(fs_info);
ASSERT(list_empty(&fs_info->delalloc_roots));

View File

@ -67,6 +67,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

View File

@ -233,14 +233,11 @@ bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
struct extent_state **cached_state);
/* This should be reworked in the future and put elsewhere. */
int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec);
struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start);
int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec);
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);

View File

@ -5298,7 +5298,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out;
}
trans = btrfs_start_transaction(tree_root, 0);
/*
* Use join to avoid potential EINTR from transaction start. See
* wait_reserve_ticket and the whole reservation callchain.
*/
if (for_reloc)
trans = btrfs_join_transaction(tree_root);
else
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out_free;
@ -5466,6 +5473,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
}
}
/*
* This subvolume is going to be completely dropped, and won't be
* recorded as dirty roots, thus pertrans meta rsv will not be freed at
* commit transaction time. So free it here manually.
*/
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
btrfs_qgroup_free_meta_all_pertrans(root);
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
btrfs_add_dropped_root(trans, root);
else

View File

@ -2018,15 +2018,14 @@ out:
return err;
}
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned clear_bits,
unsigned long page_ops)
{
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
NULL);
clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
__process_pages_contig(inode->i_mapping, locked_page,
__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
page_ops, NULL);
}
@ -2123,12 +2122,11 @@ out:
return ret;
}
int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec)
struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start)
{
struct rb_node *node;
struct extent_state *state;
int ret = 0;
struct io_failure_record *failrec;
spin_lock(&tree->lock);
/*
@ -2137,18 +2135,19 @@ int get_state_failrec(struct extent_io_tree *tree, u64 start,
*/
node = tree_search(tree, start);
if (!node) {
ret = -ENOENT;
failrec = ERR_PTR(-ENOENT);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
if (state->start != start) {
ret = -ENOENT;
failrec = ERR_PTR(-ENOENT);
goto out;
}
*failrec = state->failrec;
failrec = state->failrec;
out:
spin_unlock(&tree->lock);
return ret;
return failrec;
}
/*
@ -2378,8 +2377,8 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
if (!ret)
return 0;
ret = get_state_failrec(failure_tree, start, &failrec);
if (ret)
failrec = get_state_failrec(failure_tree, start);
if (IS_ERR(failrec))
return 0;
BUG_ON(!failrec->this_mirror);
@ -2451,8 +2450,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
spin_unlock(&failure_tree->lock);
}
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret)
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct io_failure_record *failrec;
@ -2463,65 +2462,8 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
int ret;
u64 logical;
ret = get_state_failrec(failure_tree, start, &failrec);
if (ret) {
failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec)
return -ENOMEM;
failrec->start = start;
failrec->len = end - start + 1;
failrec->this_mirror = 0;
failrec->bio_flags = 0;
failrec->in_validation = 0;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (!em) {
read_unlock(&em_tree->lock);
kfree(failrec);
return -EIO;
}
if (em->start > start || em->start + em->len <= start) {
free_extent_map(em);
em = NULL;
}
read_unlock(&em_tree->lock);
if (!em) {
kfree(failrec);
return -EIO;
}
logical = start - em->start;
logical = em->block_start + logical;
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
logical = em->block_start;
failrec->bio_flags = EXTENT_BIO_COMPRESSED;
extent_set_compress_type(&failrec->bio_flags,
em->compress_type);
}
btrfs_debug(fs_info,
"Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
logical, start, failrec->len);
failrec->logical = logical;
free_extent_map(em);
/* set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY);
if (ret >= 0)
ret = set_state_failrec(failure_tree, start, failrec);
/* set the bits in the inode's tree */
if (ret >= 0)
ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
if (ret < 0) {
kfree(failrec);
return ret;
}
} else {
failrec = get_state_failrec(failure_tree, start);
if (!IS_ERR(failrec)) {
btrfs_debug(fs_info,
"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
failrec->logical, failrec->start, failrec->len,
@ -2531,11 +2473,66 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
* (e.g. with a list for failed_mirror) to make
* clean_io_failure() clean all those errors at once.
*/
return failrec;
}
*failrec_ret = failrec;
failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec)
return ERR_PTR(-ENOMEM);
return 0;
failrec->start = start;
failrec->len = end - start + 1;
failrec->this_mirror = 0;
failrec->bio_flags = 0;
failrec->in_validation = 0;
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (!em) {
read_unlock(&em_tree->lock);
kfree(failrec);
return ERR_PTR(-EIO);
}
if (em->start > start || em->start + em->len <= start) {
free_extent_map(em);
em = NULL;
}
read_unlock(&em_tree->lock);
if (!em) {
kfree(failrec);
return ERR_PTR(-EIO);
}
logical = start - em->start;
logical = em->block_start + logical;
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
logical = em->block_start;
failrec->bio_flags = EXTENT_BIO_COMPRESSED;
extent_set_compress_type(&failrec->bio_flags, em->compress_type);
}
btrfs_debug(fs_info,
"Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
logical, start, failrec->len);
failrec->logical = logical;
free_extent_map(em);
/* Set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY);
if (ret >= 0) {
ret = set_state_failrec(failure_tree, start, failrec);
/* Set the bits in the inode's tree */
ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
} else if (ret < 0) {
kfree(failrec);
return ERR_PTR(ret);
}
return failrec;
}
static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
@ -2660,16 +2657,15 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct bio *repair_bio;
struct btrfs_io_bio *repair_io_bio;
blk_status_t status;
int ret;
btrfs_debug(fs_info,
"repair read error: read error at %llu", start);
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
return errno_to_blk_status(ret);
failrec = btrfs_get_io_failure_record(inode, start, end);
if (IS_ERR(failrec))
return errno_to_blk_status(PTR_ERR(failrec));
need_validation = btrfs_io_needs_validation(inode, failed_bio);
@ -3420,7 +3416,7 @@ static void update_nr_written(struct writeback_control *wbc,
* This returns 0 if all went well (page still locked)
* This returns < 0 if there were errors (page still locked)
*/
static noinline_for_stack int writepage_delalloc(struct inode *inode,
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct page *page, struct writeback_control *wbc,
u64 delalloc_start, unsigned long *nr_written)
{
@ -3433,7 +3429,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
while (delalloc_end < page_end) {
found = find_lock_delalloc_range(inode, page,
found = find_lock_delalloc_range(&inode->vfs_inode, page,
&delalloc_start,
&delalloc_end);
if (!found) {
@ -3450,8 +3446,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
* started, so we don't want to return > 0 unless
* things are going well.
*/
ret = ret < 0 ? ret : -EIO;
goto done;
return ret < 0 ? ret : -EIO;
}
/*
* delalloc_end is already one less than the total length, so
@ -3483,10 +3478,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
return 1;
}
ret = 0;
done:
return ret;
return 0;
}
/*
@ -3497,7 +3489,7 @@ done:
* 0 if all went well (page still locked)
* < 0 if there were errors (page still locked)
*/
static noinline_for_stack int __extent_writepage_io(struct inode *inode,
static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
struct page *page,
struct writeback_control *wbc,
struct extent_page_data *epd,
@ -3505,7 +3497,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
unsigned long nr_written,
int *nr_ret)
{
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *tree = &inode->io_tree;
u64 start = page_offset(page);
u64 page_end = start + PAGE_SIZE - 1;
u64 end;
@ -3537,7 +3529,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
update_nr_written(wbc, nr_written + 1);
end = page_end;
blocksize = inode->i_sb->s_blocksize;
blocksize = inode->vfs_inode.i_sb->s_blocksize;
while (cur <= end) {
u64 em_end;
@ -3548,8 +3540,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page_end, 1);
break;
}
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur,
end - cur + 1);
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
ret = PTR_ERR_OR_ZERO(em);
@ -3586,7 +3577,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
btrfs_err(inode->root->fs_info,
"page %lu not writeback, cur %llu end %llu",
page->index, cur, end);
}
@ -3659,15 +3650,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
set_page_extent_mapped(page);
if (!epd->extent_locked) {
ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
&nr_written);
if (ret == 1)
return 0;
if (ret)
goto done;
}
ret = __extent_writepage_io(inode, page, wbc, epd,
i_size, nr_written, &nr);
ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
nr_written, &nr);
if (ret == 1)
return 0;
@ -4127,7 +4119,7 @@ retry:
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
ret = flush_write_bio(&epd);
} else {
ret = -EUCLEAN;
ret = -EROFS;
end_write_bio(&epd, ret);
}
return ret;
@ -4489,6 +4481,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
page->mapping->host->i_size > SZ_16M) {
u64 len;
while (start <= end) {
struct btrfs_fs_info *fs_info;
u64 cur_gen;
len = end - start + 1;
write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
@ -4502,15 +4497,45 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
free_extent_map(em);
break;
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
EXTENT_LOCKED, 0, NULL)) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&btrfs_inode->runtime_flags);
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
}
if (test_range_bit(tree, em->start,
extent_map_end(em) - 1,
EXTENT_LOCKED, 0, NULL))
goto next;
/*
* If it's not in the list of modified extents, used
* by a fast fsync, we can remove it. If it's being
* logged we can safely remove it since fsync took an
* extra reference on the em.
*/
if (list_empty(&em->list) ||
test_bit(EXTENT_FLAG_LOGGING, &em->flags))
goto remove_em;
/*
* If it's in the list of modified extents, remove it
* only if its generation is older then the current one,
* in which case we don't need it for a fast fsync.
* Otherwise don't remove it, we could be racing with an
* ongoing fast fsync that could miss the new extent.
*/
fs_info = btrfs_inode->root->fs_info;
spin_lock(&fs_info->trans_lock);
cur_gen = fs_info->generation;
spin_unlock(&fs_info->trans_lock);
if (em->generation >= cur_gen)
goto next;
remove_em:
/*
* We only remove extent maps that are not in the list of
* modified extents or that are in the list but with a
* generation lower then the current generation, so there
* is no need to set the full fsync flag on the inode (it
* hurts the fsync performance for workloads with a data
* size that exceeds or is close to the system's memory).
*/
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
next:
start = extent_map_end(em);
write_unlock(&map->lock);
@ -4670,7 +4695,7 @@ static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
}
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
u64 start, u64 len)
{
int ret = 0;
u64 off = start;

View File

@ -204,7 +204,7 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
u64 start, u64 len);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
@ -277,7 +277,7 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);

View File

@ -522,10 +522,10 @@ fail:
* means this bio can contains potentially discontigous bio vecs
* so the logical offset of each should be calculated separately.
*/
blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;

View File

@ -500,18 +500,18 @@ next:
* this also makes the decision about creating an inline extent vs
* doing real data extents, marking pages dirty and delalloc as required.
*/
int btrfs_dirty_pages(struct inode *inode, struct page **pages,
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int err = 0;
int i;
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
u64 end_pos = pos + write_bytes;
loff_t isize = i_size_read(inode);
loff_t isize = i_size_read(&inode->vfs_inode);
unsigned int extra_bits = 0;
start_pos = pos & ~((u64) fs_info->sectorsize - 1);
@ -524,13 +524,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
* The pages may have already been dirty, clear out old accounting so
* we can set things up properly
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block,
clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached);
if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
if (!btrfs_is_free_space_inode(inode)) {
if (start_pos >= isize &&
!(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
!(inode->flags & BTRFS_INODE_PREALLOC)) {
/*
* There can't be any extents following eof in this case
* so just set the delalloc new bit for the range
@ -538,8 +538,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
*/
extra_bits |= EXTENT_DELALLOC_NEW;
} else {
err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
start_pos,
err = btrfs_find_new_delalloc_bytes(inode, start_pos,
num_bytes, cached);
if (err)
return err;
@ -564,7 +563,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
* at this time.
*/
if (end_pos > isize)
i_size_write(inode, end_pos);
i_size_write(&inode->vfs_inode, end_pos);
return 0;
}
@ -731,7 +730,7 @@ next:
* is deleted from the tree.
*/
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path, u64 start, u64 end,
u64 *drop_end, int drop_cache,
int replace_extent,
@ -744,7 +743,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 ino = btrfs_ino(BTRFS_I(inode));
struct inode *vfs_inode = &inode->vfs_inode;
u64 ino = btrfs_ino(inode);
u64 search_start = start;
u64 disk_bytenr = 0;
u64 num_bytes = 0;
@ -762,9 +762,9 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
int leafs_visited = 0;
if (drop_cache)
btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
btrfs_drop_extent_cache(inode, start, end - 1, 0);
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
if (start >= inode->disk_i_size && !replace_extent)
modify_tree = 0;
update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
@ -935,7 +935,7 @@ next_slot:
extent_end - end);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
inode_sub_bytes(inode, end - key.offset);
inode_sub_bytes(vfs_inode, end - key.offset);
break;
}
@ -955,7 +955,7 @@ next_slot:
start - key.offset);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
inode_sub_bytes(inode, extent_end - start);
inode_sub_bytes(vfs_inode, extent_end - start);
if (end == extent_end)
break;
@ -979,7 +979,7 @@ delete_extent_item:
if (update_refs &&
extent_type == BTRFS_FILE_EXTENT_INLINE) {
inode_sub_bytes(inode,
inode_sub_bytes(vfs_inode,
extent_end - key.offset);
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
@ -993,7 +993,7 @@ delete_extent_item:
key.offset - extent_offset);
ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
inode_sub_bytes(inode,
inode_sub_bytes(vfs_inode,
extent_end - key.offset);
}
@ -1082,8 +1082,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
drop_cache, 0, 0, NULL);
ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start,
end, NULL, drop_cache, 0, 0, NULL);
btrfs_free_path(path);
return ret;
}
@ -1532,8 +1532,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
return ret;
}
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes, bool nowait)
static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes, bool nowait)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
@ -1541,6 +1541,9 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
u64 num_bytes;
int ret;
if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return 0;
if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN;
@ -1583,6 +1586,42 @@ out_unlock:
return ret;
}
static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
return check_can_nocow(inode, pos, write_bytes, true);
}
/*
* Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
*
* @pos: File offset
* @write_bytes: The length to write, will be updated to the nocow writeable
* range
*
* This function will flush ordered extents in the range to ensure proper
* nocow checks.
*
* Return:
* >0 and update @write_bytes if we can do nocow write
* 0 if we can't do nocow write
* -EAGAIN if we can't get the needed lock or there are ordered extents
* for * (nowait == true) case
* <0 if other error happened
*
* NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
*/
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
return check_can_nocow(inode, pos, write_bytes, false);
}
void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
{
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
}
static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct iov_iter *i)
{
@ -1590,7 +1629,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
@ -1643,13 +1681,12 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
fs_info->sectorsize);
extent_changeset_release(data_reserved);
ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
ret = btrfs_check_data_free_space(BTRFS_I(inode),
&data_reserved, pos,
write_bytes);
if (ret < 0) {
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
check_can_nocow(BTRFS_I(inode), pos,
&write_bytes, false) > 0) {
if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
&write_bytes) > 0) {
/*
* For nodata cow case, no need to reserve
* data space.
@ -1674,11 +1711,11 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode,
btrfs_free_reserved_data_space(BTRFS_I(inode),
data_reserved, pos,
write_bytes);
else
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_check_nocow_unlock(BTRFS_I(inode));
break;
}
@ -1748,7 +1785,7 @@ again:
__pos = round_down(pos,
fs_info->sectorsize) +
(dirty_pages << PAGE_SHIFT);
btrfs_delalloc_release_space(inode,
btrfs_delalloc_release_space(BTRFS_I(inode),
data_reserved, __pos,
release_bytes, true);
}
@ -1758,8 +1795,9 @@ again:
fs_info->sectorsize);
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, &cached_state);
ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
dirty_pages, pos, copied,
&cached_state);
/*
* If we have not locked the extent range, because the range's
@ -1782,7 +1820,7 @@ again:
release_bytes = 0;
if (only_release_metadata)
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_check_nocow_unlock(BTRFS_I(inode));
if (only_release_metadata && copied > 0) {
lockstart = round_down(pos,
@ -1800,8 +1838,6 @@ again:
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
btrfs_btree_balance_dirty(fs_info);
pos += copied;
num_written += copied;
@ -1811,11 +1847,12 @@ again:
if (release_bytes) {
if (only_release_metadata) {
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_check_nocow_unlock(BTRFS_I(inode));
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes, true);
} else {
btrfs_delalloc_release_space(inode, data_reserved,
btrfs_delalloc_release_space(BTRFS_I(inode),
data_reserved,
round_down(pos, fs_info->sectorsize),
release_bytes, true);
}
@ -1926,10 +1963,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
* We will allocate space in case nodatacow is not set,
* so bail
*/
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) ||
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
true) <= 0) {
if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes)
<= 0) {
inode_unlock(inode);
return -EAGAIN;
}
@ -2598,7 +2633,7 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
cur_offset = start;
while (cur_offset < end) {
ret = __btrfs_drop_extents(trans, root, inode, path,
ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path,
cur_offset, end + 1, &drop_end,
1, 0, 0, NULL);
if (ret != -ENOSPC) {
@ -3176,14 +3211,14 @@ reserve_space:
if (ret < 0)
goto out;
space_reserved = true;
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
alloc_start, bytes_to_reserve);
if (ret)
goto out;
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
&cached_state);
if (ret)
goto out;
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
alloc_start, bytes_to_reserve);
if (ret)
goto out;
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
i_blocksize(inode),
@ -3199,7 +3234,7 @@ reserve_space:
ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
out:
if (ret && space_reserved)
btrfs_free_reserved_data_space(inode, data_reserved,
btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
alloc_start, bytes_to_reserve);
extent_changeset_free(data_reserved);
@ -3350,8 +3385,9 @@ static long btrfs_fallocate(struct file *file, int mode,
free_extent_map(em);
break;
}
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
cur_offset, last_byte - cur_offset);
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
&data_reserved, cur_offset,
last_byte - cur_offset);
if (ret < 0) {
cur_offset = last_byte;
free_extent_map(em);
@ -3363,8 +3399,9 @@ static long btrfs_fallocate(struct file *file, int mode,
* range, free reserved data space first, otherwise
* it'll result in false ENOSPC error.
*/
btrfs_free_reserved_data_space(inode, data_reserved,
cur_offset, last_byte - cur_offset);
btrfs_free_reserved_data_space(BTRFS_I(inode),
data_reserved, cur_offset,
last_byte - cur_offset);
}
free_extent_map(em);
cur_offset = last_byte;
@ -3381,7 +3418,7 @@ static long btrfs_fallocate(struct file *file, int mode,
range->len, i_blocksize(inode),
offset + len, &alloc_hint);
else
btrfs_free_reserved_data_space(inode,
btrfs_free_reserved_data_space(BTRFS_I(inode),
data_reserved, range->start,
range->len);
list_del(&range->list);
@ -3402,7 +3439,7 @@ out:
inode_unlock(inode);
/* Let go of our reservation. */
if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved,
btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
cur_offset, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
return ret;

View File

@ -1334,8 +1334,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_zero_remaining_pages(io_ctl);
/* Everything is written out, now we dirty the pages in the file. */
ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0,
i_size_read(inode), &cached_state);
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
io_ctl->num_pages, 0, i_size_read(inode),
&cached_state);
if (ret)
goto out_nospc;
@ -2703,8 +2704,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
* pointed to by the cluster, someone else raced in and freed the
* cluster already. In that case, we just return without changing anything
*/
static int
__btrfs_return_cluster_to_free_space(
static void __btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
@ -2756,7 +2756,6 @@ __btrfs_return_cluster_to_free_space(
out:
spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
return 0;
}
static void __btrfs_remove_free_space_cache_locked(
@ -2907,12 +2906,11 @@ out:
* Otherwise, it'll get a reference on the block group pointed to by the
* cluster and remove the cluster from it.
*/
int btrfs_return_cluster_to_free_space(
void btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl;
int ret;
/* first, get a safe pointer to the block group */
spin_lock(&cluster->lock);
@ -2920,28 +2918,27 @@ int btrfs_return_cluster_to_free_space(
block_group = cluster->block_group;
if (!block_group) {
spin_unlock(&cluster->lock);
return 0;
return;
}
} else if (cluster->block_group != block_group) {
/* someone else has already freed it don't redo their work */
spin_unlock(&cluster->lock);
return 0;
return;
}
atomic_inc(&block_group->count);
btrfs_get_block_group(block_group);
spin_unlock(&cluster->lock);
ctl = block_group->free_space_ctl;
/* now return any extents the cluster had on it */
spin_lock(&ctl->tree_lock);
ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
__btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&ctl->tree_lock);
btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
/* finally drop our ref */
btrfs_put_block_group(block_group);
return ret;
}
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
@ -3358,7 +3355,7 @@ int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
list_del_init(&entry->list);
if (!ret) {
atomic_inc(&block_group->count);
btrfs_get_block_group(block_group);
list_add_tail(&cluster->block_group_list,
&block_group->cluster_list);
cluster->block_group = block_group;

View File

@ -136,7 +136,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
void btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group *block_group,

View File

@ -495,7 +495,8 @@ again:
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 0,
prealloc);
if (ret)
goto out_put;

File diff suppressed because it is too large Load Diff

View File

@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
/* Check if @flags are a supported and valid set of FS_*_FL flags */
static int check_fsflags(unsigned int flags)
/*
* Check if @flags are a supported and valid set of FS_*_FL flags and that
* the old and new flags are not conflicting
*/
static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
/* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
return -EINVAL;
/* NOCOW and compression options are mutually exclusive */
if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
return -EINVAL;
if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
return -EINVAL;
return 0;
}
@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
u32 binode_flags = binode->flags;
u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
ret = check_fsflags(fsflags);
if (ret)
return ret;
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
goto out_unlock;
binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
@ -566,6 +580,7 @@ static noinline int create_subvol(struct inode *dir,
struct inode *inode;
int ret;
int err;
dev_t anon_dev = 0;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
@ -578,6 +593,10 @@ static noinline int create_subvol(struct inode *dir,
if (ret)
goto fail_free;
ret = get_anon_bdev(&anon_dev);
if (ret < 0)
goto fail_free;
/*
* Don't create subvolume whose level is not zero. Or qgroup will be
* screwed up since it assumes subvolume qgroup's level to be 0.
@ -660,12 +679,15 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
key.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, objectid, true);
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
if (IS_ERR(new_root)) {
free_anon_bdev(anon_dev);
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
goto fail;
}
/* Freeing will be done in btrfs_put_root() of new_root */
anon_dev = 0;
btrfs_record_root_in_trans(trans, new_root);
@ -735,6 +757,8 @@ fail:
return ret;
fail_free:
if (anon_dev)
free_anon_bdev(anon_dev);
kfree(root_item);
return ret;
}
@ -762,6 +786,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!pending_snapshot)
return -ENOMEM;
ret = get_anon_bdev(&pending_snapshot->anon_dev);
if (ret < 0)
goto free_pending;
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
GFP_KERNEL);
pending_snapshot->path = btrfs_alloc_path();
@ -823,10 +850,16 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
d_instantiate(dentry, inode);
ret = 0;
pending_snapshot->anon_dev = 0;
fail:
/* Prevent double freeing of anon_dev */
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
kfree(pending_snapshot);
@ -1243,7 +1276,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
if (ret)
@ -1265,7 +1298,7 @@ again:
while (1) {
lock_extent_bits(tree, page_start, page_end,
&cached_state);
ordered = btrfs_lookup_ordered_extent(inode,
ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
page_start);
unlock_extent_cached(tree, page_start, page_end,
&cached_state);
@ -1333,7 +1366,7 @@ again:
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start_index << PAGE_SHIFT,
(page_cnt - i_done) << PAGE_SHIFT, true);
}
@ -1361,7 +1394,7 @@ out:
unlock_page(pages[i]);
put_page(pages[i]);
}
btrfs_delalloc_release_space(inode, data_reserved,
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
@ -3198,11 +3231,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 flags_in;
int ret = 0;
fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
if (!fi_args)
return -ENOMEM;
fi_args = memdup_user(arg, sizeof(*fi_args));
if (IS_ERR(fi_args))
return PTR_ERR(fi_args);
flags_in = fi_args->flags;
memset(fi_args, 0, sizeof(*fi_args));
rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
@ -3218,6 +3255,23 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
}
if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
fi_args->generation = fs_info->generation;
fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
}
if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
sizeof(fi_args->metadata_uuid));
fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
}
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;

View File

@ -15,6 +15,7 @@
#include "disk-io.h"
#include "compression.h"
#include "delalloc-space.h"
#include "qgroup.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@ -152,23 +153,39 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
/* allocate and add a new ordered_extent into the per-inode tree.
/*
* Allocate and add a new ordered_extent into the per-inode tree.
*
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type, int dio,
int compress_type)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ret;
ret = 0;
} else {
/*
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
if (ret < 0)
return ret;
}
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
if (!entry)
return -ENOMEM;
@ -178,9 +195,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->num_bytes = num_bytes;
entry->disk_num_bytes = disk_num_bytes;
entry->bytes_left = num_bytes;
entry->inode = igrab(inode);
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@ -197,10 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
INIT_LIST_HEAD(&entry->log_list);
INIT_LIST_HEAD(&entry->trans_list);
trace_btrfs_ordered_extent_add(inode, entry);
trace_btrfs_ordered_extent_add(&inode->vfs_inode, entry);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
@ -228,14 +244,14 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, 1);
spin_unlock(&inode->lock);
return 0;
}
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type)
{
@ -244,7 +260,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type)
{
@ -253,7 +269,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type,
int compress_type)
@ -291,12 +307,12 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
* file_offset is updated to one byte past the range that is recorded as
* complete. This allows you to walk forward in the file.
*/
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size, int uptodate)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_inode_tree *tree;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
int ret;
@ -305,7 +321,6 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
u64 dec_start;
u64 to_dec;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, *file_offset);
if (!node) {
@ -429,8 +444,6 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (refcount_dec_and_test(&entry->refs)) {
ASSERT(list_empty(&entry->log_list));
ASSERT(list_empty(&entry->trans_list));
ASSERT(list_empty(&entry->root_extent_list));
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode)
@ -698,14 +711,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* find an ordered extent corresponding to file_offset. return NULL if
* nothing is found, otherwise take a reference on the extent and return it
*/
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
tree = &inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
@ -803,7 +816,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int index = 0;
ordered = btrfs_lookup_ordered_extent(inode, offset);
ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), offset);
if (!ordered)
return 0;

View File

@ -92,6 +92,9 @@ struct btrfs_ordered_extent {
/* compression algorithm */
int compress_type;
/* Qgroup reserved space */
int qgroup_rsv;
/* reference count */
refcount_t refs;
@ -101,12 +104,6 @@ struct btrfs_ordered_extent {
/* list of checksums for insertion when the extent io is done */
struct list_head list;
/* If we need to wait on this to be done */
struct list_head log_list;
/* If the transaction needs to wait on this ordered extent */
struct list_head trans_list;
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait;
@ -150,23 +147,23 @@ void btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type);
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type,
int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);

View File

@ -11,7 +11,6 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
#include <linux/sizes.h>
#include "ctree.h"
#include "transaction.h"
@ -22,6 +21,7 @@
#include "extent_io.h"
#include "qgroup.h"
#include "block-group.h"
#include "sysfs.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
@ -220,10 +220,12 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
return qgroup;
}
static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup)
{
struct btrfs_qgroup_list *list;
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
list_del(&qgroup->dirty);
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
@ -252,7 +254,7 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
return -ENOENT;
rb_erase(&qgroup->node, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
__del_qgroup_rb(fs_info, qgroup);
return 0;
}
@ -351,6 +353,9 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
goto out;
}
ret = btrfs_sysfs_add_qgroups(fs_info);
if (ret < 0)
goto out;
/* default this to quota off, in case no status key is found */
fs_info->qgroup_flags = 0;
@ -412,6 +417,10 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
goto out;
}
}
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0)
goto out;
switch (found_key.type) {
case BTRFS_QGROUP_INFO_KEY: {
struct btrfs_qgroup_info_item *ptr;
@ -500,11 +509,50 @@ out:
ulist_free(fs_info->qgroup_ulist);
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
btrfs_sysfs_del_qgroups(fs_info);
}
return ret < 0 ? ret : 0;
}
/*
* Called in close_ctree() when quota is still enabled. This verifies we don't
* leak some reserved space.
*
* Return false if no reserved space is left.
* Return true if some reserved space is leaked.
*/
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
{
struct rb_node *node;
bool ret = false;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return ret;
/*
* Since we're unmounting, there is no race and no need to grab qgroup
* lock. And here we don't go post-order to provide a more user
* friendly sorted result.
*/
for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) {
struct btrfs_qgroup *qgroup;
int i;
qgroup = rb_entry(node, struct btrfs_qgroup, node);
for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) {
if (qgroup->rsv.values[i]) {
ret = true;
btrfs_warn(fs_info,
"qgroup %hu/%llu has unreleased space, type %d rsv %llu",
btrfs_qgroup_level(qgroup->qgroupid),
btrfs_qgroup_subvolid(qgroup->qgroupid),
i, qgroup->rsv.values[i]);
}
}
}
return ret;
}
/*
* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
* first two are in single-threaded paths.And for the third one, we have set
@ -519,7 +567,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
__del_qgroup_rb(fs_info, qgroup);
}
/*
* We call btrfs_free_qgroup_config() when unmounting
@ -528,6 +576,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
*/
ulist_free(fs_info->qgroup_ulist);
fs_info->qgroup_ulist = NULL;
btrfs_sysfs_del_qgroups(fs_info);
}
static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
@ -900,6 +949,9 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
goto out;
}
ret = btrfs_sysfs_add_qgroups(fs_info);
if (ret < 0)
goto out;
/*
* 1 for quota root item
* 1 for BTRFS_QGROUP_STATUS item
@ -987,6 +1039,11 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
}
ret = btrfs_next_item(tree_root, path);
if (ret < 0) {
@ -1011,6 +1068,11 @@ out_add_root:
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_commit_transaction(trans);
trans = NULL;
@ -1046,6 +1108,7 @@ out:
fs_info->qgroup_ulist = NULL;
if (trans)
btrfs_end_transaction(trans);
btrfs_sysfs_del_qgroups(fs_info);
}
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
@ -1398,8 +1461,11 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
qgroup = add_qgroup_rb(fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
if (IS_ERR(qgroup))
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
goto out;
}
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
@ -2818,6 +2884,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
unlock:
spin_unlock(&fs_info->qgroup_lock);
if (!ret)
ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
out:
if (!committing)
mutex_unlock(&fs_info->qgroup_ioctl_lock);
@ -2826,20 +2894,8 @@ out:
return ret;
}
/*
* Two limits to commit transaction in advance.
*
* For RATIO, it will be 1/RATIO of the remaining limit as threshold.
* For SIZE, it will be in byte unit as threshold.
*/
#define QGROUP_FREE_RATIO 32
#define QGROUP_FREE_SIZE SZ_32M
static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
const struct btrfs_qgroup *qg, u64 num_bytes)
static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
{
u64 free;
u64 threshold;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false;
@ -2848,32 +2904,6 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false;
/*
* Even if we passed the check, it's better to check if reservation
* for meta_pertrans is pushing us near limit.
* If there is too much pertrans reservation or it's near the limit,
* let's try commit transaction to free some, using transaction_kthread
*/
if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl;
threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO,
QGROUP_FREE_SIZE);
} else {
free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer;
threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO,
QGROUP_FREE_SIZE);
}
/*
* Use transaction_kthread to commit transaction, so we no
* longer need to bother nested transaction nor lock context.
*/
if (free < threshold)
btrfs_commit_transaction_locksafe(fs_info);
}
return true;
}
@ -2921,7 +2951,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
qg = unode_aux_to_qgroup(unode);
if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) {
if (enforce && !qgroup_check_limits(qg, num_bytes)) {
ret = -EDQUOT;
goto out;
}
@ -3378,28 +3408,132 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
}
}
#define rbtree_iterate_from_safe(node, next, start) \
for (node = start; node && ({ next = rb_next(node); 1;}); node = next)
static int qgroup_unreserve_range(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len)
{
struct rb_node *node;
struct rb_node *next;
struct ulist_node *entry = NULL;
int ret = 0;
node = reserved->range_changed.root.rb_node;
while (node) {
entry = rb_entry(node, struct ulist_node, rb_node);
if (entry->val < start)
node = node->rb_right;
else if (entry)
node = node->rb_left;
else
break;
}
/* Empty changeset */
if (!entry)
return 0;
if (entry->val > start && rb_prev(&entry->rb_node))
entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
rb_node);
rbtree_iterate_from_safe(node, next, &entry->rb_node) {
u64 entry_start;
u64 entry_end;
u64 entry_len;
int clear_ret;
entry = rb_entry(node, struct ulist_node, rb_node);
entry_start = entry->val;
entry_end = entry->aux;
entry_len = entry_end - entry_start + 1;
if (entry_start >= start + len)
break;
if (entry_start + entry_len <= start)
continue;
/*
* Now the entry is in [start, start + len), revert the
* EXTENT_QGROUP_RESERVED bit.
*/
clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
entry_end, EXTENT_QGROUP_RESERVED);
if (!ret && clear_ret < 0)
ret = clear_ret;
ulist_del(&reserved->range_changed, entry->val, entry->aux);
if (likely(reserved->bytes_changed >= entry_len)) {
reserved->bytes_changed -= entry_len;
} else {
WARN_ON(1);
reserved->bytes_changed = 0;
}
}
return ret;
}
/*
* Reserve qgroup space for range [start, start + len).
* Try to free some space for qgroup.
*
* This function will either reserve space from related qgroups or doing
* nothing if the range is already reserved.
* For qgroup, there are only 3 ways to free qgroup space:
* - Flush nodatacow write
* Any nodatacow write will free its reserved data space at run_delalloc_range().
* In theory, we should only flush nodatacow inodes, but it's not yet
* possible, so we need to flush the whole root.
*
* Return 0 for successful reserve
* Return <0 for error (including -EQUOT)
* - Wait for ordered extents
* When ordered extents are finished, their reserved metadata is finally
* converted to per_trans status, which can be freed by later commit
* transaction.
*
* NOTE: this function may sleep for memory allocation.
* if btrfs_qgroup_reserve_data() is called multiple times with
* same @reserved, caller must ensure when error happens it's OK
* to free *ALL* reserved space.
* - Commit transaction
* This would free the meta_per_trans space.
* In theory this shouldn't provide much space, but any more qgroup space
* is needed.
*/
int btrfs_qgroup_reserve_data(struct inode *inode,
static int try_flush_qgroup(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
int ret;
/*
* We don't want to run flush again and again, so if there is a running
* one, we won't try to start a new flush, but exit directly.
*/
if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
wait_event(root->qgroup_flush_wait,
!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
return 0;
}
ret = btrfs_start_delalloc_snapshot(root);
if (ret < 0)
goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_commit_transaction(trans);
out:
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
wake_up(&root->qgroup_flush_wait);
return ret;
}
static int qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved_ret, u64 start,
u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct ulist_node *unode;
struct ulist_iterator uiter;
struct btrfs_root *root = inode->root;
struct extent_changeset *reserved;
bool new_reserved = false;
u64 orig_reserved;
u64 to_reserve;
int ret;
@ -3412,6 +3546,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
if (WARN_ON(!reserved_ret))
return -EINVAL;
if (!*reserved_ret) {
new_reserved = true;
*reserved_ret = extent_changeset_alloc();
if (!*reserved_ret)
return -ENOMEM;
@ -3419,15 +3554,15 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
reserved = *reserved_ret;
/* Record already reserved space */
orig_reserved = reserved->bytes_changed;
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
ret = set_record_extent_bits(&inode->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, reserved);
/* Newly reserved space */
to_reserve = reserved->bytes_changed - orig_reserved;
trace_btrfs_qgroup_reserve_data(inode, start, len,
trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len,
to_reserve, QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
goto out;
ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
if (ret < 0)
goto cleanup;
@ -3435,23 +3570,49 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
return ret;
cleanup:
/* cleanup *ALL* already reserved ranges */
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(&reserved->range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
/* Also free data bytes of already reserved one */
btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
orig_reserved, BTRFS_QGROUP_RSV_DATA);
extent_changeset_release(reserved);
qgroup_unreserve_range(inode, reserved, start, len);
out:
if (new_reserved) {
extent_changeset_release(reserved);
kfree(reserved);
*reserved_ret = NULL;
}
return ret;
}
/*
* Reserve qgroup space for range [start, start + len).
*
* This function will either reserve space from related qgroups or do nothing
* if the range is already reserved.
*
* Return 0 for successful reservation
* Return <0 for error (including -EQUOT)
*
* NOTE: This function may sleep for memory allocation, dirty page flushing and
* commit transaction. So caller should not hold any dirty page locked.
*/
int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved_ret, u64 start,
u64 len)
{
int ret;
ret = qgroup_reserve_data(inode, reserved_ret, start, len);
if (ret <= 0 && ret != -EDQUOT)
return ret;
ret = try_flush_qgroup(inode->root);
if (ret < 0)
return ret;
return qgroup_reserve_data(inode, reserved_ret, start, len);
}
/* Free ranges specified by @reserved, normally in error path */
static int qgroup_free_reserved_data(struct inode *inode,
static int qgroup_free_reserved_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root *root = inode->root;
struct ulist_node *unode;
struct ulist_iterator uiter;
struct extent_changeset changeset;
@ -3487,8 +3648,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
* EXTENT_QGROUP_RESERVED, we won't double free.
* So not need to rush.
*/
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
free_start, free_start + free_len - 1,
ret = clear_record_extent_bits(&inode->io_tree, free_start,
free_start + free_len - 1,
EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
goto out;
@ -3502,7 +3663,7 @@ out:
return ret;
}
static int __btrfs_qgroup_release_data(struct inode *inode,
static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len,
int free)
{
@ -3510,8 +3671,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
int trace_op = QGROUP_RELEASE;
int ret;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED,
&BTRFS_I(inode)->root->fs_info->flags))
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags))
return 0;
/* In release case, we shouldn't have @reserved */
@ -3519,18 +3679,18 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
if (free && reserved)
return qgroup_free_reserved_data(inode, reserved, start, len);
extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
goto out;
if (free)
trace_op = QGROUP_FREE;
trace_btrfs_qgroup_release_data(inode, start, len,
trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len,
changeset.bytes_changed, trace_op);
if (free)
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->root_key.objectid,
btrfs_qgroup_free_refroot(inode->root->fs_info,
inode->root->root_key.objectid,
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
ret = changeset.bytes_changed;
out:
@ -3550,7 +3710,7 @@ out:
*
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_free_data(struct inode *inode,
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
{
return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
@ -3571,7 +3731,7 @@ int btrfs_qgroup_free_data(struct inode *inode,
*
* NOTE: This function may sleep for memory allocation.
*/
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
{
return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
}
@ -3616,7 +3776,7 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
return num_bytes;
}
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@ -3643,6 +3803,21 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return ret;
}
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce)
{
int ret;
ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
if (ret <= 0 && ret != -EDQUOT)
return ret;
ret = try_flush_qgroup(root);
if (ret < 0)
return ret;
return qgroup_reserve_meta(root, num_bytes, type, enforce);
}
void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@ -3742,7 +3917,7 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
* Check qgroup reserved space leaking, normally at destroy inode
* time
*/
void btrfs_qgroup_check_reserved_leak(struct inode *inode)
void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
{
struct extent_changeset changeset;
struct ulist_node *unode;
@ -3750,19 +3925,19 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
int ret;
extent_changeset_init(&changeset);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
EXTENT_QGROUP_RESERVED, &changeset);
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
ULIST_ITER_INIT(&iter);
while ((unode = ulist_next(&changeset.range_changed, &iter))) {
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
inode->i_ino, unode->val, unode->aux);
btrfs_warn(inode->root->fs_info,
"leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu",
btrfs_ino(inode), unode->val, unode->aux);
}
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->root_key.objectid,
btrfs_qgroup_free_refroot(inode->root->fs_info,
inode->root->root_key.objectid,
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
}

View File

@ -8,6 +8,7 @@
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/kobject.h>
#include "ulist.h"
#include "delayed-ref.h"
@ -223,8 +224,18 @@ struct btrfs_qgroup {
*/
u64 old_refcnt;
u64 new_refcnt;
/*
* Sysfs kobjectid
*/
struct kobject kobj;
};
static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
{
return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1));
}
/*
* For qgroup event trace points only
*/
@ -344,12 +355,12 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
#endif
/* New io_tree based accurate qgroup reserve API */
int btrfs_qgroup_reserve_data(struct inode *inode,
int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
/* Reserve metadata space for pertrans and prealloc type */
@ -399,7 +410,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
*/
void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode);
/* btrfs_qgroup_swapped_blocks related functions */
void btrfs_qgroup_init_swapped_blocks(
@ -415,5 +426,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
#endif

View File

@ -1083,7 +1083,6 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
unsigned long bio_max_len)
{
struct bio *last = bio_list->tail;
u64 last_end = 0;
int ret;
struct bio *bio;
struct btrfs_bio_stripe *stripe;
@ -1098,15 +1097,14 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
last_end = (u64)last->bi_iter.bi_sector << 9;
u64 last_end = (u64)last->bi_iter.bi_sector << 9;
last_end += last->bi_iter.bi_size;
/*
* we can't merge these if they are from different
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
!last->bi_status &&
if (last_end == disk_start && !last->bi_status &&
last->bi_disk == stripe->dev->bdev->bd_disk &&
last->bi_partno == stripe->dev->bdev->bd_partno) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
@ -1117,6 +1115,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* put a new bio on the list */
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
btrfs_io_bio(bio)->device = stripe->dev;
bio->bi_iter.bi_size = 0;
bio_set_dev(bio, stripe->dev->bdev);
bio->bi_iter.bi_sector = disk_start >> 9;
@ -1325,11 +1324,7 @@ write_data:
atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
bio->bi_opf = REQ_OP_WRITE;
@ -1354,7 +1349,6 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
u64 physical = bio->bi_iter.bi_sector;
u64 stripe_start;
int i;
struct btrfs_bio_stripe *stripe;
@ -1362,9 +1356,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
for (i = 0; i < rbio->bbio->num_stripes; i++) {
stripe = &rbio->bbio->stripes[i];
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
@ -1382,18 +1374,14 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
u64 logical = bio->bi_iter.bi_sector;
u64 stripe_start;
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
int i;
logical <<= 9;
for (i = 0; i < rbio->nr_data; i++) {
stripe_start = rbio->bbio->raid_map[i];
if (logical >= stripe_start &&
logical < stripe_start + rbio->stripe_len) {
u64 stripe_start = rbio->bbio->raid_map[i];
if (in_range(logical, stripe_start, rbio->stripe_len))
return i;
}
}
return -1;
}
@ -1567,11 +1555,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
bio->bi_opf = REQ_OP_READ;
@ -1878,11 +1862,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
}
/* make sure our ps and qs are in order */
if (faila > failb) {
int tmp = failb;
failb = faila;
faila = tmp;
}
if (faila > failb)
swap(faila, failb);
/* if the q stripe is failed, do a pstripe reconstruction
* from the xors.
@ -2102,7 +2083,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
*/
if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
__raid_recover_end_io(rbio);
goto out;
return 0;
} else {
goto cleanup;
}
@ -2113,11 +2094,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
bio->bi_opf = REQ_OP_READ;
@ -2126,7 +2103,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
submit_bio(bio);
}
out:
return 0;
cleanup:
@ -2482,11 +2459,7 @@ submit_write:
atomic_set(&rbio->stripes_pending, nr_data);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
bio->bi_opf = REQ_OP_WRITE;
@ -2664,11 +2637,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
bio->bi_opf = REQ_OP_READ;

View File

@ -286,6 +286,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
exist_re = insert_root_entry(&exist->roots, re);
if (exist_re)
kfree(re);
} else {
kfree(re);
}
kfree(be);
return exist;

View File

@ -68,8 +68,8 @@ static int copy_inline_to_page(struct inode *inode,
* reservation here. Also we must not do the reservation while holding
* a transaction open, otherwise we would deadlock.
*/
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, file_offset,
block_size);
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
file_offset, block_size);
if (ret)
goto out;
@ -84,7 +84,8 @@ static int copy_inline_to_page(struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, file_offset, range_end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, NULL);
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), file_offset, range_end,
0, NULL);
if (ret)
goto out_unlock;
@ -133,8 +134,8 @@ out_unlock:
put_page(page);
}
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, file_offset,
block_size, true);
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
file_offset, block_size, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), block_size);
out:
extent_changeset_free(data_reserved);
@ -336,6 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
while (1) {
u64 next_key_min_offset = key.offset + 1;
struct btrfs_file_extent_item *extent;
u64 extent_gen;
int type;
u32 size;
struct btrfs_key new_key;
@ -384,6 +386,7 @@ process_slot:
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
extent_gen = btrfs_file_extent_generation(leaf, extent);
comp = btrfs_file_extent_compression(leaf, extent);
type = btrfs_file_extent_type(leaf, extent);
if (type == BTRFS_FILE_EXTENT_REG ||
@ -488,6 +491,19 @@ process_slot:
btrfs_release_path(path);
/*
* If this is a new extent update the last_reflink_trans of both
* inodes. This is used by fsync to make sure it does not log
* multiple checksum items with overlapping ranges. For older
* extents we don't need to do it since inode logging skips the
* checksums for older extents. Also ignore holes and inline
* extents because they don't have checksums in the csum tree.
*/
if (extent_gen == trans->transid && disko > 0) {
BTRFS_I(src)->last_reflink_trans = trans->transid;
BTRFS_I(inode)->last_reflink_trans = trans->transid;
}
last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize);
ret = clone_finish_inode_update(trans, inode, last_dest_end,

View File

@ -1686,12 +1686,20 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
btrfs_unlock_up_safe(path, 0);
}
min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
/*
* In merge_reloc_root(), we modify the upper level pointer to swap the
* tree blocks between reloc tree and subvolume tree. Thus for tree
* block COW, we COW at most from level 1 to root level for each tree.
*
* Thus the needed metadata size is at most root_level * nodesize,
* and * 2 since we have two trees to COW.
*/
min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
memset(&next_key, 0, sizeof(next_key));
while (1) {
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
err = ret;
goto out;
@ -2571,58 +2579,50 @@ out_free_blocks:
return err;
}
static noinline_for_stack
int prealloc_file_extent_cluster(struct inode *inode,
struct file_extent_cluster *cluster)
static noinline_for_stack int prealloc_file_extent_cluster(
struct btrfs_inode *inode,
struct file_extent_cluster *cluster)
{
u64 alloc_hint = 0;
u64 start;
u64 end;
u64 offset = BTRFS_I(inode)->index_cnt;
u64 offset = inode->index_cnt;
u64 num_bytes;
int nr = 0;
int nr;
int ret = 0;
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset;
struct extent_changeset *data_reserved = NULL;
u64 cur_offset = prealloc_start;
BUG_ON(cluster->start != cluster->boundary[0]);
inode_lock(inode);
ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
prealloc_end + 1 - prealloc_start);
ret = btrfs_alloc_data_chunk_ondemand(inode,
prealloc_end + 1 - prealloc_start);
if (ret)
goto out;
return ret;
cur_offset = prealloc_start;
while (nr < cluster->nr) {
inode_lock(&inode->vfs_inode);
for (nr = 0; nr < cluster->nr; nr++) {
start = cluster->boundary[nr] - offset;
if (nr + 1 < cluster->nr)
end = cluster->boundary[nr + 1] - 1 - offset;
else
end = cluster->end - offset;
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
lock_extent(&inode->io_tree, start, end);
num_bytes = end + 1 - start;
if (cur_offset < start)
btrfs_free_reserved_data_space(inode, data_reserved,
cur_offset, start - cur_offset);
ret = btrfs_prealloc_file_range(inode, 0, start,
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
cur_offset = end + 1;
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
unlock_extent(&inode->io_tree, start, end);
if (ret)
break;
nr++;
}
inode_unlock(&inode->vfs_inode);
if (cur_offset < prealloc_end)
btrfs_free_reserved_data_space(inode, data_reserved,
cur_offset, prealloc_end + 1 - cur_offset);
out:
inode_unlock(inode);
extent_changeset_free(data_reserved);
btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
prealloc_end + 1 - cur_offset);
return ret;
}
@ -2664,7 +2664,8 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
*/
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
{
return atomic_read(&fs_info->balance_cancel_req);
return atomic_read(&fs_info->balance_cancel_req) ||
fatal_signal_pending(current);
}
ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
@ -2690,7 +2691,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!ra)
return -ENOMEM;
ret = prealloc_file_extent_cluster(inode, cluster);
ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
if (ret)
goto out;
@ -2762,8 +2763,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
nr++;
}
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
page_end, 0, NULL);
if (ret) {
unlock_page(page);
put_page(page);
@ -3872,9 +3873,9 @@ out:
* cloning checksum properly handles the nodatasum extents.
* it also saves CPU time to re-calculate the checksum.
*/
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered;
int ret;
@ -3885,7 +3886,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
disk_bytenr = file_pos + inode->index_cnt;
ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
disk_bytenr + len - 1, &list, 0);
if (ret)

View File

@ -1616,13 +1616,9 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
struct scrub_page *spage = sblock->pagev[page_num];
BUG_ON(spage->page == NULL);
if (spage->io_error) {
void *mapped_buffer = kmap_atomic(spage->page);
if (spage->io_error)
clear_page(page_address(spage->page));
clear_page(mapped_buffer);
flush_dcache_page(spage->page);
kunmap_atomic(mapped_buffer);
}
return scrub_add_page_to_wr_bio(sblock->sctx, spage);
}
@ -1790,42 +1786,21 @@ static int scrub_checksum_data(struct scrub_block *sblock)
struct btrfs_fs_info *fs_info = sctx->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 csum[BTRFS_CSUM_SIZE];
u8 *on_disk_csum;
struct page *page;
void *buffer;
u64 len;
int index;
struct scrub_page *spage;
char *kaddr;
BUG_ON(sblock->page_count < 1);
if (!sblock->pagev[0]->have_csum)
spage = sblock->pagev[0];
if (!spage->have_csum)
return 0;
kaddr = page_address(spage->page);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
on_disk_csum = sblock->pagev[0]->csum;
page = sblock->pagev[0]->page;
buffer = kmap_atomic(page);
len = sctx->fs_info->sectorsize;
index = 0;
for (;;) {
u64 l = min_t(u64, len, PAGE_SIZE);
crypto_shash_update(shash, buffer, l);
kunmap_atomic(buffer);
len -= l;
if (len == 0)
break;
index++;
BUG_ON(index >= sblock->page_count);
BUG_ON(!sblock->pagev[index]->page);
page = sblock->pagev[index]->page;
buffer = kmap_atomic(page);
}
crypto_shash_final(shash, csum);
if (memcmp(csum, on_disk_csum, sctx->csum_size))
if (memcmp(csum, spage->csum, sctx->csum_size))
sblock->checksum_error = 1;
return sblock->checksum_error;
@ -1839,20 +1814,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 calculated_csum[BTRFS_CSUM_SIZE];
u8 on_disk_csum[BTRFS_CSUM_SIZE];
struct page *page;
void *mapped_buffer;
u64 mapped_size;
void *p;
u64 len;
int index;
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
int i;
struct scrub_page *spage;
char *kaddr;
BUG_ON(sblock->page_count < 1);
page = sblock->pagev[0]->page;
mapped_buffer = kmap_atomic(page);
h = (struct btrfs_header *)mapped_buffer;
spage = sblock->pagev[0];
kaddr = page_address(spage->page);
h = (struct btrfs_header *)kaddr;
memcpy(on_disk_csum, h->csum, sctx->csum_size);
/*
@ -1860,40 +1830,29 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
* a) don't have an extent buffer and
* b) the page is already kmapped
*/
if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
if (spage->logical != btrfs_stack_header_bytenr(h))
sblock->header_error = 1;
if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
if (spage->generation != btrfs_stack_header_generation(h)) {
sblock->header_error = 1;
sblock->generation_error = 1;
}
if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
if (!scrub_check_fsid(h->fsid, spage))
sblock->header_error = 1;
if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE))
sblock->header_error = 1;
len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
index = 0;
for (;;) {
u64 l = min_t(u64, len, mapped_size);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
PAGE_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_update(shash, p, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
break;
index++;
BUG_ON(index >= sblock->page_count);
BUG_ON(!sblock->pagev[index]->page);
page = sblock->pagev[index]->page;
mapped_buffer = kmap_atomic(page);
mapped_size = PAGE_SIZE;
p = mapped_buffer;
for (i = 1; i < num_pages; i++) {
kaddr = page_address(sblock->pagev[i]->page);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
}
crypto_shash_final(shash, calculated_csum);
@ -1910,57 +1869,31 @@ static int scrub_checksum_super(struct scrub_block *sblock)
struct btrfs_fs_info *fs_info = sctx->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 calculated_csum[BTRFS_CSUM_SIZE];
u8 on_disk_csum[BTRFS_CSUM_SIZE];
struct page *page;
void *mapped_buffer;
u64 mapped_size;
void *p;
struct scrub_page *spage;
char *kaddr;
int fail_gen = 0;
int fail_cor = 0;
u64 len;
int index;
BUG_ON(sblock->page_count < 1);
spage = sblock->pagev[0];
kaddr = page_address(spage->page);
s = (struct btrfs_super_block *)kaddr;
if (spage->logical != btrfs_super_bytenr(s))
++fail_cor;
if (spage->generation != btrfs_super_generation(s))
++fail_gen;
if (!scrub_check_fsid(s->fsid, spage))
++fail_cor;
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
BUG_ON(sblock->page_count < 1);
page = sblock->pagev[0]->page;
mapped_buffer = kmap_atomic(page);
s = (struct btrfs_super_block *)mapped_buffer;
memcpy(on_disk_csum, s->csum, sctx->csum_size);
if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
++fail_cor;
if (sblock->pagev[0]->generation != btrfs_super_generation(s))
++fail_gen;
if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
++fail_cor;
len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
index = 0;
for (;;) {
u64 l = min_t(u64, len, mapped_size);
crypto_shash_update(shash, p, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
break;
index++;
BUG_ON(index >= sblock->page_count);
BUG_ON(!sblock->pagev[index]->page);
page = sblock->pagev[index]->page;
mapped_buffer = kmap_atomic(page);
mapped_size = PAGE_SIZE;
p = mapped_buffer;
}
crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
if (memcmp(calculated_csum, s->csum, sctx->csum_size))
++fail_cor;
if (fail_cor + fail_gen) {
@ -1973,10 +1906,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
++sctx->stat.super_errors;
spin_unlock(&sctx->stat_lock);
if (fail_cor)
btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
btrfs_dev_stat_inc_and_print(spage->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
else
btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
btrfs_dev_stat_inc_and_print(spage->dev,
BTRFS_DEV_STAT_GENERATION_ERRS);
}
@ -3758,7 +3691,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info = sctx->fs_info;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EIO;
return -EROFS;
/* Seed devices of a new filesystem has their own generation. */
if (scrub_dev->fs_devices != fs_info->fs_devices)

View File

@ -468,8 +468,8 @@ again:
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
cache->start, cache->length, cache->used, cache->pinned,
cache->reserved, cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
btrfs_dump_free_space(cache, bytes);
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;

View File

@ -67,6 +67,21 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
*
* EUCLEAN: Any sort of corruption that we encounter. The tree-checker for
* instance will return EUCLEAN if any of the blocks are corrupted in
* a way that is problematic. We want to reserve EUCLEAN for these
* sort of corruptions.
*
* EROFS: If we check BTRFS_FS_STATE_ERROR and fail out with a return error, we
* need to use EROFS for this case. We will have no idea of the
* original failure, that will have been reported at the time we tripped
* over the error. Each subsequent error that doesn't have any context
* of the original error should use EROFS when handling BTRFS_FS_STATE_ERROR.
*/
const char * __attribute_const__ btrfs_decode_error(int errno)
{
char *errstr = "unknown";
@ -326,7 +341,6 @@ enum {
Opt_defrag, Opt_nodefrag,
Opt_discard, Opt_nodiscard,
Opt_discard_mode,
Opt_nologreplay,
Opt_norecovery,
Opt_ratio,
Opt_rescan_uuid_tree,
@ -340,13 +354,15 @@ enum {
Opt_subvolid,
Opt_thread_pool,
Opt_treelog, Opt_notreelog,
Opt_usebackuproot,
Opt_user_subvol_rm_allowed,
/* Rescue options */
Opt_rescue,
Opt_usebackuproot,
Opt_nologreplay,
/* Deprecated options */
Opt_alloc_start,
Opt_recovery,
Opt_subvolrootid,
/* Debugging options */
Opt_check_integrity,
@ -390,7 +406,6 @@ static const match_table_t tokens = {
{Opt_discard, "discard"},
{Opt_discard_mode, "discard=%s"},
{Opt_nodiscard, "nodiscard"},
{Opt_nologreplay, "nologreplay"},
{Opt_norecovery, "norecovery"},
{Opt_ratio, "metadata_ratio=%u"},
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
@ -408,13 +423,17 @@ static const match_table_t tokens = {
{Opt_thread_pool, "thread_pool=%u"},
{Opt_treelog, "treelog"},
{Opt_notreelog, "notreelog"},
{Opt_usebackuproot, "usebackuproot"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
/* Rescue options */
{Opt_rescue, "rescue=%s"},
/* Deprecated, with alias rescue=nologreplay */
{Opt_nologreplay, "nologreplay"},
/* Deprecated, with alias rescue=usebackuproot */
{Opt_usebackuproot, "usebackuproot"},
/* Deprecated options */
{Opt_alloc_start, "alloc_start=%s"},
{Opt_recovery, "recovery"},
{Opt_subvolrootid, "subvolrootid=%d"},
/* Debugging options */
{Opt_check_integrity, "check_int"},
@ -433,6 +452,55 @@ static const match_table_t tokens = {
{Opt_err, NULL},
};
static const match_table_t rescue_tokens = {
{Opt_usebackuproot, "usebackuproot"},
{Opt_nologreplay, "nologreplay"},
{Opt_err, NULL},
};
static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
{
char *opts;
char *orig;
char *p;
substring_t args[MAX_OPT_ARGS];
int ret = 0;
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
return -ENOMEM;
orig = opts;
while ((p = strsep(&opts, ":")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, rescue_tokens, args);
switch (token){
case Opt_usebackuproot:
btrfs_info(info,
"trying to use backup root at mount time");
btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
break;
case Opt_nologreplay:
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_err:
btrfs_info(info, "unrecognized rescue option '%s'", p);
ret = -EINVAL;
goto out;
default:
break;
}
}
out:
kfree(orig);
return ret;
}
/*
* Regular mount options parser. Everything that is needed only when
* reading in a new superblock is parsed here.
@ -479,7 +547,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvol:
case Opt_subvol_empty:
case Opt_subvolid:
case Opt_subvolrootid:
case Opt_device:
/*
* These are parsed by btrfs_parse_subvol_options or
@ -663,10 +730,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
goto out;
}
break;
case Opt_alloc_start:
btrfs_info(info,
"option alloc_start is obsolete, ignored");
break;
case Opt_acl:
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
info->sb->s_flags |= SB_POSIXACL;
@ -689,6 +752,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_norecovery:
case Opt_nologreplay:
btrfs_warn(info,
"'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
@ -762,6 +827,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
}
break;
case Opt_inode_cache:
btrfs_warn(info,
"the 'inode_cache' option is deprecated and will have no effect from 5.11");
btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
"enabling inode map caching");
break;
@ -791,10 +858,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
"disabling auto defrag");
break;
case Opt_recovery:
btrfs_warn(info,
"'recovery' is deprecated, use 'usebackuproot' instead");
fallthrough;
case Opt_usebackuproot:
btrfs_warn(info,
"'%s' is deprecated, use 'rescue=usebackuproot' instead",
token == Opt_recovery ? "recovery" :
"usebackuproot");
btrfs_info(info,
"trying to use backup root at mount time");
btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
@ -859,6 +927,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
}
info->commit_interval = intarg;
break;
case Opt_rescue:
ret = parse_rescue_options(info, args[0].from);
if (ret < 0)
goto out;
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
btrfs_info(info, "fragmenting all space");
@ -1020,9 +1093,6 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
*subvol_objectid = subvolid;
break;
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
break;
default:
break;
}
@ -1344,7 +1414,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (btrfs_test_opt(info, NOTREELOG))
seq_puts(seq, ",notreelog");
if (btrfs_test_opt(info, NOLOGREPLAY))
seq_puts(seq, ",nologreplay");
seq_puts(seq, ",rescue=nologreplay");
if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(info, DISCARD_SYNC))
@ -1712,11 +1782,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
new_pool_size);
}
static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
{
set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
unsigned long old_opts, int flags)
{
@ -1750,8 +1815,6 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_discard_cleanup(fs_info);
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@ -1767,7 +1830,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
int ret;
sync_filesystem(sb);
btrfs_remount_prepare(fs_info);
set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
if (data) {
void *new_sec_opts = NULL;
@ -1889,6 +1952,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
out:
wake_up_process(fs_info->transaction_kthread);
btrfs_remount_cleanup(fs_info, old_opts);
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
return 0;
restore:
@ -1903,6 +1968,8 @@ restore:
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
btrfs_remount_cleanup(fs_info, old_opts);
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
return ret;
}
@ -2296,9 +2363,7 @@ static int btrfs_unfreeze(struct super_block *sb)
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
struct btrfs_fs_devices *cur_devices;
struct btrfs_device *dev, *first_dev = NULL;
struct list_head *head;
/*
* Lightweight locking of the devices. We should not need
@ -2308,18 +2373,13 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
* least until the rcu_read_unlock.
*/
rcu_read_lock();
cur_devices = fs_info->fs_devices;
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
cur_devices = cur_devices->seed;
list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
if (first_dev)

View File

@ -19,6 +19,7 @@
#include "volumes.h"
#include "space-info.h"
#include "block-group.h"
#include "qgroup.h"
struct btrfs_feature_attr {
struct kobj_attribute kobj_attr;
@ -936,8 +937,12 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
{
struct kobject *fsid_kobj = &fs_info->fs_devices->fsid_kobj;
btrfs_reset_fs_info_ptr(fs_info);
sysfs_remove_link(fsid_kobj, "bdi");
if (fs_info->space_info_kobj) {
sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
kobject_del(fs_info->space_info_kobj);
@ -957,8 +962,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
}
#endif
addrm_unknown_feature_attrs(fs_info, false);
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
sysfs_remove_files(fsid_kobj, btrfs_attrs);
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, NULL);
}
@ -1273,7 +1278,9 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
{
int error = 0;
struct btrfs_device *dev;
unsigned int nofs_flag;
nofs_flag = memalloc_nofs_save();
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
if (one_device && one_device != dev)
@ -1301,6 +1308,7 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
break;
}
}
memalloc_nofs_restore(nofs_flag);
return error;
}
@ -1438,6 +1446,10 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
if (error)
goto failure;
error = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi");
if (error)
goto failure;
fs_info->space_info_kobj = kobject_create_and_add("allocation",
fsid_kobj);
if (!fs_info->space_info_kobj) {
@ -1455,6 +1467,153 @@ failure:
return error;
}
static inline struct btrfs_fs_info *qgroup_kobj_to_fs_info(struct kobject *kobj)
{
return to_fs_info(kobj->parent->parent);
}
#define QGROUP_ATTR(_member, _show_name) \
static ssize_t btrfs_qgroup_show_##_member(struct kobject *qgroup_kobj, \
struct kobj_attribute *a, \
char *buf) \
{ \
struct btrfs_fs_info *fs_info = qgroup_kobj_to_fs_info(qgroup_kobj); \
struct btrfs_qgroup *qgroup = container_of(qgroup_kobj, \
struct btrfs_qgroup, kobj); \
return btrfs_show_u64(&qgroup->_member, &fs_info->qgroup_lock, buf); \
} \
BTRFS_ATTR(qgroup, _show_name, btrfs_qgroup_show_##_member)
#define QGROUP_RSV_ATTR(_name, _type) \
static ssize_t btrfs_qgroup_rsv_show_##_name(struct kobject *qgroup_kobj, \
struct kobj_attribute *a, \
char *buf) \
{ \
struct btrfs_fs_info *fs_info = qgroup_kobj_to_fs_info(qgroup_kobj); \
struct btrfs_qgroup *qgroup = container_of(qgroup_kobj, \
struct btrfs_qgroup, kobj); \
return btrfs_show_u64(&qgroup->rsv.values[_type], \
&fs_info->qgroup_lock, buf); \
} \
BTRFS_ATTR(qgroup, rsv_##_name, btrfs_qgroup_rsv_show_##_name)
QGROUP_ATTR(rfer, referenced);
QGROUP_ATTR(excl, exclusive);
QGROUP_ATTR(max_rfer, max_referenced);
QGROUP_ATTR(max_excl, max_exclusive);
QGROUP_ATTR(lim_flags, limit_flags);
QGROUP_RSV_ATTR(data, BTRFS_QGROUP_RSV_DATA);
QGROUP_RSV_ATTR(meta_pertrans, BTRFS_QGROUP_RSV_META_PERTRANS);
QGROUP_RSV_ATTR(meta_prealloc, BTRFS_QGROUP_RSV_META_PREALLOC);
static struct attribute *qgroup_attrs[] = {
BTRFS_ATTR_PTR(qgroup, referenced),
BTRFS_ATTR_PTR(qgroup, exclusive),
BTRFS_ATTR_PTR(qgroup, max_referenced),
BTRFS_ATTR_PTR(qgroup, max_exclusive),
BTRFS_ATTR_PTR(qgroup, limit_flags),
BTRFS_ATTR_PTR(qgroup, rsv_data),
BTRFS_ATTR_PTR(qgroup, rsv_meta_pertrans),
BTRFS_ATTR_PTR(qgroup, rsv_meta_prealloc),
NULL
};
ATTRIBUTE_GROUPS(qgroup);
static void qgroup_release(struct kobject *kobj)
{
struct btrfs_qgroup *qgroup = container_of(kobj, struct btrfs_qgroup, kobj);
memset(&qgroup->kobj, 0, sizeof(*kobj));
}
static struct kobj_type qgroup_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = qgroup_release,
.default_groups = qgroup_groups,
};
int btrfs_sysfs_add_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup)
{
struct kobject *qgroups_kobj = fs_info->qgroups_kobj;
int ret;
if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
return 0;
if (qgroup->kobj.state_initialized)
return 0;
if (!qgroups_kobj)
return -EINVAL;
ret = kobject_init_and_add(&qgroup->kobj, &qgroup_ktype, qgroups_kobj,
"%hu_%llu", btrfs_qgroup_level(qgroup->qgroupid),
btrfs_qgroup_subvolid(qgroup->qgroupid));
if (ret < 0)
kobject_put(&qgroup->kobj);
return ret;
}
void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info)
{
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup *next;
if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
return;
rbtree_postorder_for_each_entry_safe(qgroup, next,
&fs_info->qgroup_tree, node)
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
kobject_del(fs_info->qgroups_kobj);
kobject_put(fs_info->qgroups_kobj);
fs_info->qgroups_kobj = NULL;
}
/* Called when qgroups get initialized, thus there is no need for locking */
int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info)
{
struct kobject *fsid_kobj = &fs_info->fs_devices->fsid_kobj;
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup *next;
int ret = 0;
if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
return 0;
ASSERT(fsid_kobj);
if (fs_info->qgroups_kobj)
return 0;
fs_info->qgroups_kobj = kobject_create_and_add("qgroups", fsid_kobj);
if (!fs_info->qgroups_kobj) {
ret = -ENOMEM;
goto out;
}
rbtree_postorder_for_each_entry_safe(qgroup, next,
&fs_info->qgroup_tree, node) {
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0)
goto out;
}
out:
if (ret < 0)
btrfs_sysfs_del_qgroups(fs_info);
return ret;
}
void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup)
{
if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state))
return;
if (qgroup->kobj.state_initialized) {
kobject_del(&qgroup->kobj);
kobject_put(&qgroup->kobj);
}
}
/*
* Change per-fs features in /sys/fs/btrfs/UUID/features to match current

View File

@ -36,4 +36,11 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
void btrfs_sysfs_update_devid(struct btrfs_device *device);
int btrfs_sysfs_add_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup);
void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info);
int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup);
#endif

View File

@ -60,8 +60,6 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
if (i >= num_extents)
goto invalid;
if (i >= num_extents ||
extent_start != extents[i].start ||
offset - extent_start != extents[i].length)

View File

@ -954,8 +954,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
btrfs_test_inode_set_ops(inode);
/* [BTRFS_MAX_EXTENT_SIZE] */
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), 0,
BTRFS_MAX_EXTENT_SIZE - 1, 0, NULL);
if (ret) {
test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE,
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
0, NULL);
if (ret) {
@ -999,7 +999,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1)
+ sectorsize - 1,
0, NULL);
@ -1017,7 +1017,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
0, NULL);
@ -1035,7 +1035,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
if (ret) {
@ -1069,7 +1069,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
* Refill the hole again just for good measure, because I thought it
* might fail and I'd rather satisfy my paranoia at this point.
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
if (ret) {

View File

@ -937,7 +937,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (TRANS_ABORTED(trans) ||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
wake_up_process(info->transaction_kthread);
err = -EIO;
if (TRANS_ABORTED(trans))
err = trans->aborted;
else
err = -EROFS;
}
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@ -1630,7 +1633,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
btrfs_abort_transaction(trans, ret);
@ -2351,7 +2354,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list);

View File

@ -151,18 +151,20 @@ struct btrfs_pending_snapshot {
struct btrfs_block_rsv block_rsv;
/* extra metadata reservation for relocation */
int error;
/* Preallocated anonymous block device number */
dev_t anon_dev;
bool readonly;
struct list_head list;
};
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode)
struct btrfs_inode *inode)
{
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
spin_unlock(&BTRFS_I(inode)->lock);
spin_lock(&inode->lock);
inode->last_trans = trans->transaction->transid;
inode->last_sub_trans = inode->root->log_transid;
inode->last_log_commit = inode->root->last_log_commit;
spin_unlock(&inode->lock);
}
/*
@ -208,20 +210,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
int wait_for_unblock);
/*
* Try to commit transaction asynchronously, so this is safe to call
* even holding a spinlock.
*
* It's done by informing transaction_kthread to commit transaction without
* waiting for commit interval.
*/
static inline void btrfs_commit_transaction_locksafe(
struct btrfs_fs_info *fs_info)
{
set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
wake_up_process(fs_info->transaction_kthread);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info);

View File

@ -133,10 +133,9 @@ out:
ret = 0;
}
done:
if (ret != -EAGAIN) {
if (ret != -EAGAIN)
memset(&root->defrag_progress, 0,
sizeof(root->defrag_progress));
root->defrag_trans_start = trans->transid;
}
return ret;
}

View File

@ -3116,29 +3116,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_init_log_ctx(&root_log_ctx, NULL);
mutex_lock(&log_root_tree->log_mutex);
atomic_inc(&log_root_tree->log_batch);
atomic_inc(&log_root_tree->log_writers);
index2 = log_root_tree->log_transid % 2;
list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
root_log_ctx.log_transid = log_root_tree->log_transid;
mutex_unlock(&log_root_tree->log_mutex);
mutex_lock(&log_root_tree->log_mutex);
/*
* Now we are safe to update the log_root_tree because we're under the
* log_mutex, and we're a current writer so we're holding the commit
* open until we drop the log_mutex.
*/
ret = update_log_root(trans, log, &new_root_item);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
/* atomic_dec_and_test implies a barrier */
cond_wake_up_nomb(&log_root_tree->log_writer_wait);
}
if (ret) {
if (!list_empty(&root_log_ctx.list))
list_del_init(&root_log_ctx.list);
@ -3184,8 +3172,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
root_log_ctx.log_transid - 1);
}
wait_for_writer(log_root_tree);
/*
* now that we've moved on to the tree of log tree roots,
* check the full commit flag again
@ -3906,6 +3892,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
}
static int log_csums(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
@ -3913,6 +3900,14 @@ static int log_csums(struct btrfs_trans_handle *trans,
struct extent_state *cached_state = NULL;
int ret;
/*
* If this inode was not used for reflink operations in the current
* transaction with new extents, then do the fast path, no need to
* worry about logging checksum items with overlapping ranges.
*/
if (inode->last_reflink_trans < trans->transid)
return btrfs_csum_file_blocks(trans, log_root, sums);
/*
* Serialize logging for checksums. This is to avoid racing with the
* same checksum being logged by another task that is logging another
@ -4064,7 +4059,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = log_csums(trans, log, sums);
ret = log_csums(trans, inode, log, sums);
list_del(&sums->list);
kfree(sums);
}
@ -4123,7 +4118,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = log_csums(trans, log_root, sums);
ret = log_csums(trans, inode, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@ -4151,7 +4146,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
em->start + em->len, NULL, 0, 1,
sizeof(*fi), &extent_inserted);
if (ret)
@ -5123,14 +5118,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
const loff_t end,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_path *dst_path;
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
int err = 0;
int ret;
int ret = 0;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &inode->extent_tree;
@ -5166,15 +5160,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
max_key.offset = (u64)-1;
/*
* Only run delayed items if we are a dir or a new file.
* Otherwise commit the delayed inode only, which is needed in
* order for the log replay code to mark inodes for link count
* fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
* Only run delayed items if we are a directory. We want to make sure
* all directory indexes hit the fs/subvolume tree so we can find them
* and figure out which index ranges have to be logged.
*
* Otherwise commit the delayed inode only if the full sync flag is set,
* as we want to make sure an up to date version is in the subvolume
* tree so copy_inode_items_to_log() / copy_items() can find it and copy
* it to the log tree. For a non full sync, we always log the inode item
* based on the in-memory struct btrfs_inode which is always up to date.
*/
if (S_ISDIR(inode->vfs_inode.i_mode) ||
inode->generation > fs_info->last_trans_committed)
if (S_ISDIR(inode->vfs_inode.i_mode))
ret = btrfs_commit_inode_delayed_items(trans, inode);
else
else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
ret = btrfs_commit_inode_delayed_inode(inode);
if (ret) {

View File

@ -245,7 +245,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
*
* global::fs_devs - add, remove, updates to the global list
*
* does not protect: manipulation of the fs_devices::devices list!
* does not protect: manipulation of the fs_devices::devices list in general
* but in mount context it could be used to exclude list modifications by eg.
* scan ioctl
*
* btrfs_device::name - renames (write side), read is RCU
*
@ -258,6 +260,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* may be used to exclude some operations from running concurrently without any
* modifications to the list (see write_all_supers)
*
* Is not required at mount and close times, because our device list is
* protected by the uuid_mutex at that point.
*
* balance_mutex
* -------------
* protects balance structures (status, state) and context accessed from
@ -602,6 +607,11 @@ static int btrfs_free_stale_devices(const char *path,
return ret;
}
/*
* This is only used on mount, and we are protected from competing things
* messing with our fs_devices by the uuid_mutex, thus we do not need the
* fs_devices->device_list_mutex here.
*/
static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device, fmode_t flags,
void *holder)
@ -1229,8 +1239,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int ret;
lockdep_assert_held(&uuid_mutex);
/*
* The device_list_mutex cannot be taken here in case opening the
* underlying device takes further locks like bd_mutex.
*
* We also don't need the lock here as this is called during mount and
* exclusion is provided by uuid_mutex
*/
mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
ret = 0;
@ -1238,7 +1254,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
list_sort(NULL, &fs_devices->devices, devid_cmp);
ret = open_fs_devices(fs_devices, flags, holder);
}
mutex_unlock(&fs_devices->device_list_mutex);
return ret;
}
@ -3231,7 +3246,7 @@ static int del_balance_item(struct btrfs_fs_info *fs_info)
if (!path)
return -ENOMEM;
trans = btrfs_start_transaction(root, 0);
trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
return PTR_ERR(trans);
@ -4135,7 +4150,22 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
mutex_lock(&fs_info->balance_mutex);
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
btrfs_info(fs_info, "balance: paused");
else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
/*
* Balance can be canceled by:
*
* - Regular cancel request
* Then ret == -ECANCELED and balance_cancel_req > 0
*
* - Fatal signal to "btrfs" process
* Either the signal caught by wait_reserve_ticket() and callers
* got -EINTR, or caught by btrfs_should_cancel_balance() and
* got -ECANCELED.
* Either way, in this case balance_cancel_req = 0, and
* ret == -EINTR or ret == -ECANCELED.
*
* So here we only check the return value to catch canceled balance.
*/
else if (ret == -ECANCELED || ret == -EINTR)
btrfs_info(fs_info, "balance: canceled");
else
btrfs_info(fs_info, "balance: ended with status: %d", ret);
@ -5522,6 +5552,9 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
atomic_set(&bbio->error, 0);
refcount_set(&bbio->refs, 1);
bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
return bbio;
}
@ -6144,8 +6177,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
ret = -ENOMEM;
goto out;
}
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
for (i = 0; i < num_stripes; i++) {
bbio->stripes[i].physical = map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
bbio->stripes[i].dev = map->stripes[stripe_index].dev;
stripe_index++;
}
/* build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
@ -6153,11 +6191,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
u64 tmp;
unsigned rot;
bbio->raid_map = (u64 *)((void *)bbio->stripes +
sizeof(struct btrfs_bio_stripe) *
num_alloc_stripes +
sizeof(int) * tgtdev_indexes);
/* Work out the disk rotation on this stripe-set */
div_u64_rem(stripe_nr, num_stripes, &rot);
@ -6171,25 +6204,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
bbio->raid_map[(i+rot+1) % num_stripes] =
RAID6_Q_STRIPE;
}
for (i = 0; i < num_stripes; i++) {
bbio->stripes[i].physical =
map->stripes[stripe_index].physical +
stripe_offset +
stripe_nr * map->stripe_len;
bbio->stripes[i].dev =
map->stripes[stripe_index].dev;
stripe_index++;
sort_parity_stripes(bbio, num_stripes);
}
if (need_full_stripe(op))
max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
need_full_stripe(op)) {
handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
@ -6261,23 +6282,18 @@ static void btrfs_end_bio(struct bio *bio)
atomic_inc(&bbio->error);
if (bio->bi_status == BLK_STS_IOERR ||
bio->bi_status == BLK_STS_TARGET) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
struct btrfs_device *dev = btrfs_io_bio(bio)->device;
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
if (dev->bdev) {
if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc_and_print(dev,
ASSERT(dev->bdev);
if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev,
else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev,
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
}
}
@ -6313,13 +6329,12 @@ static void btrfs_end_bio(struct bio *bio)
}
static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
u64 physical, int dev_nr)
u64 physical, struct btrfs_device *dev)
{
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
struct btrfs_fs_info *fs_info = bbio->fs_info;
bio->bi_private = bbio;
btrfs_io_bio(bio)->stripe_index = dev_nr;
btrfs_io_bio(bio)->device = dev;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
btrfs_debug_in_rcu(fs_info,
@ -6420,8 +6435,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
else
bio = first_bio;
submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
dev_nr);
submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
}
btrfs_bio_counter_dec(fs_info);
return BLK_STS_OK;
@ -7029,6 +7043,19 @@ out:
return ret;
}
static void readahead_tree_node_children(struct extent_buffer *node)
{
int i;
const int nr_items = btrfs_header_nritems(node);
for (i = 0; i < nr_items; i++) {
u64 start;
start = btrfs_node_blockptr(node, i);
readahead_tree_block(node->fs_info, start);
}
}
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = fs_info->chunk_root;
@ -7039,6 +7066,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
int ret;
int slot;
u64 total_dev = 0;
u64 last_ra_node = 0;
path = btrfs_alloc_path();
if (!path)
@ -7049,7 +7077,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
* otherwise we don't need it.
*/
mutex_lock(&uuid_mutex);
mutex_lock(&fs_info->chunk_mutex);
/*
* It is possible for mount and umount to race in such a way that
@ -7072,6 +7099,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
if (ret < 0)
goto error;
while (1) {
struct extent_buffer *node;
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
@ -7082,6 +7111,17 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
goto error;
break;
}
/*
* The nodes on level 1 are not locked but we don't need to do
* that during mount time as nothing else can access the tree
*/
node = path->nodes[1];
if (node) {
if (last_ra_node != node->start) {
readahead_tree_node_children(node);
last_ra_node = node->start;
}
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.type == BTRFS_DEV_ITEM_KEY) {
struct btrfs_dev_item *dev_item;
@ -7094,7 +7134,9 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
mutex_lock(&fs_info->chunk_mutex);
ret = read_one_chunk(&found_key, leaf, chunk);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
goto error;
}
@ -7124,7 +7166,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
}
ret = 0;
error:
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&uuid_mutex);
btrfs_free_path(path);

View File

@ -288,7 +288,7 @@ struct btrfs_fs_devices {
*/
struct btrfs_io_bio {
unsigned int mirror_num;
unsigned int stripe_index;
struct btrfs_device *device;
u64 logical;
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];

View File

@ -31,13 +31,6 @@ struct extent_io_tree;
struct prelim_ref;
struct btrfs_space_info;
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
TRACE_DEFINE_ENUM(ALLOC_CHUNK);
TRACE_DEFINE_ENUM(COMMIT_TRANS);
#define show_ref_type(type) \
__print_symbolic(type, \
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
@ -67,30 +60,72 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
#define show_fi_type(type) \
__print_symbolic(type, \
{ BTRFS_FILE_EXTENT_INLINE, "INLINE" }, \
{ BTRFS_FILE_EXTENT_REG, "REG" }, \
{ BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"})
#define FLUSH_ACTIONS \
EM( BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH") \
EM( BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT") \
EM( BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL") \
EMe(BTRFS_RESERVE_FLUSH_ALL_STEAL, "BTRFS_RESERVE_FLUSH_ALL_STEAL")
#define show_qgroup_rsv_type(type) \
__print_symbolic(type, \
{ BTRFS_QGROUP_RSV_DATA, "DATA" }, \
{ BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" }, \
{ BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
#define FI_TYPES \
EM( BTRFS_FILE_EXTENT_INLINE, "INLINE") \
EM( BTRFS_FILE_EXTENT_REG, "REG") \
EMe(BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC")
#define QGROUP_RSV_TYPES \
EM( BTRFS_QGROUP_RSV_DATA, "DATA") \
EM( BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS") \
EMe(BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC")
#define IO_TREE_OWNER \
EM( IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS") \
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
EM( IO_TREE_INODE_IO, "INODE_IO") \
EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \
EM( IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT") \
EM( IO_TREE_LOG_CSUM_RANGE, "LOG_CSUM_RANGE") \
EMe(IO_TREE_SELFTEST, "SELFTEST")
#define FLUSH_STATES \
EM( FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR") \
EM( FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS") \
EM( FLUSH_DELALLOC, "FLUSH_DELALLOC") \
EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
EMe(COMMIT_TRANS, "COMMIT_TRANS")
/*
* First define the enums in the above macros to be exported to userspace via
* TRACE_DEFINE_ENUM().
*/
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
FLUSH_ACTIONS
FI_TYPES
QGROUP_RSV_TYPES
IO_TREE_OWNER
FLUSH_STATES
/*
* Now redefine the EM and EMe macros to map the enums to the strings that will
* be printed in the output
*/
#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}
#define show_extent_io_tree_owner(owner) \
__print_symbolic(owner, \
{ IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \
{ IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \
{ IO_TREE_INODE_IO, "INODE_IO" }, \
{ IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \
{ IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \
{ IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \
{ IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \
{ IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \
{ IO_TREE_LOG_CSUM_RANGE, "LOG_CSUM_RANGE" }, \
{ IO_TREE_SELFTEST, "SELFTEST" })
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
@ -380,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
__entry->disk_isize, __entry->extent_start,
__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
__entry->disk_bytenr, __entry->disk_num_bytes,
__entry->extent_offset, show_fi_type(__entry->extent_type),
__entry->extent_offset, __print_symbolic(__entry->extent_type, FI_TYPES),
__entry->compression)
);
@ -421,7 +456,7 @@ DECLARE_EVENT_CLASS(
"extent_type=%s compression=%u",
show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
__entry->disk_isize, __entry->extent_start,
__entry->extent_end, show_fi_type(__entry->extent_type),
__entry->extent_end, __print_symbolic(__entry->extent_type, FI_TYPES),
__entry->compression)
);
@ -1042,12 +1077,6 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->bytes)
);
#define show_flush_action(action) \
__print_symbolic(action, \
{ BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH"}, \
{ BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT"}, \
{ BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL"})
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
@ -1071,25 +1100,13 @@ TRACE_EVENT(btrfs_trigger_flush,
TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
__get_str(reason), __entry->flush,
show_flush_action(__entry->flush),
__print_symbolic(__entry->flush, FLUSH_ACTIONS),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
__entry->bytes)
);
#define show_flush_state(state) \
__print_symbolic(state, \
{ FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR"}, \
{ FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS"}, \
{ FLUSH_DELALLOC, "FLUSH_DELALLOC"}, \
{ FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT"}, \
{ FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR"}, \
{ FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS"}, \
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
{ ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE"}, \
{ RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS"}, \
{ COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
@ -1114,7 +1131,7 @@ TRACE_EVENT(btrfs_flush_space,
TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
__entry->state,
show_flush_state(__entry->state),
__print_symbolic(__entry->state, FLUSH_STATES),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
@ -1690,7 +1707,7 @@ TRACE_EVENT(qgroup_update_reserve,
),
TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
__entry->qgid, show_qgroup_rsv_type(__entry->type),
__entry->qgid, __print_symbolic(__entry->type, QGROUP_RSV_TYPES),
__entry->cur_reserved, __entry->diff)
);
@ -1714,7 +1731,7 @@ TRACE_EVENT(qgroup_meta_reserve,
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(__entry->type), __entry->diff)
__print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
);
TRACE_EVENT(qgroup_meta_convert,
@ -1735,8 +1752,8 @@ TRACE_EVENT(qgroup_meta_convert,
TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
__print_symbolic(BTRFS_QGROUP_RSV_META_PREALLOC, QGROUP_RSV_TYPES),
__print_symbolic(BTRFS_QGROUP_RSV_META_PERTRANS, QGROUP_RSV_TYPES),
__entry->diff)
);
@ -1762,7 +1779,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans,
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(__entry->type), __entry->diff)
__print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
);
DECLARE_EVENT_CLASS(btrfs__prelim_ref,
@ -1920,7 +1937,7 @@ TRACE_EVENT(btrfs_set_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s",
show_extent_io_tree_owner(__entry->owner), __entry->ino,
__print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->set_bits, "|", EXTENT_FLAGS))
);
@ -1959,7 +1976,7 @@ TRACE_EVENT(btrfs_clear_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s",
show_extent_io_tree_owner(__entry->owner), __entry->ino,
__print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
);
@ -2000,7 +2017,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s",
show_extent_io_tree_owner(__entry->owner), __entry->ino,
__print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->set_bits , "|", EXTENT_FLAGS),
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))

View File

@ -243,6 +243,18 @@ struct btrfs_ioctl_dev_info_args {
__u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
};
/*
* Retrieve information about the filesystem
*/
/* Request information about checksum type and size */
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
/* Request information about filesystem generation */
#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
/* Request information about filesystem metadata UUID */
#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* out */
__u64 num_devices; /* out */
@ -250,8 +262,13 @@ struct btrfs_ioctl_fs_info_args {
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
__u32 reserved32;
__u64 reserved[122]; /* pad to 1k */
/* See BTRFS_FS_INFO_FLAG_* */
__u16 csum_type; /* out */
__u16 csum_size; /* out */
__u64 flags; /* in/out */
__u64 generation; /* out */
__u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
__u8 reserved[944]; /* pad to 1k */
};
/*

View File

@ -913,9 +913,9 @@ struct btrfs_free_space_info {
#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
#define BTRFS_QGROUP_LEVEL_SHIFT 48
static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
{
return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
}
/*