for-6.12-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmbjB00ACgkQxWXV+ddt
 WDsZ/xAAmKAPYvn9P8UZcsG+J0Uxs+OyDJ6SDxkZWw/m3gH1SzRH1+baWNIFvsTO
 O5GcEsjXkjfC+vBOU0Hoje1YsvLXboZLM85NvgcA8FZPAnUkLkUjVD1jYTGN7YPM
 gfkVFUJWdYs6uYTAX8bCIxgAqmABLJ6Jlk9HFpL82WqkUQLd/0vCnUpmRXPdG+2D
 RiL9b8HG9v9vahMLyVhUUdeoubD88/lNqIhbiVCoqqC+6IuFYJlpj2nZEO1N/k5F
 iX2VBMayRXbX0JziryU6fuE0VIr0yEttRUDdjgxL4SwCcrXBQzFaIgV43ygWbW+Y
 QI6nuD+hhOwDq3Dn0zz27LV5d0A5YNAGV/nO042FC28GL+qZeensX9SVKxR3ig99
 tLkHhyQ8q8AdanVk/LskbBjm2LaARJIKDVmgpllVR9o+ur4P23koVsPTz/VTgBKt
 DcMCHNpRrryz3/S4dCcNvN7NCxZkKiPOgVx0Q6hPj/URG9LB72Gu+wrQ2wZAdxkV
 yIbY2EOEiks0fnuRVrG0I0WRJjyWZUSsTpjnC8VXSK+6hdfi0M2Vr+ErFMbU95Ys
 FyGEHfGGP5WBEEE/uxPOMRc9u6KOCfHoAjWrMPyKeJDqWS2gQ4ovOw+JjzpmaqXJ
 qghLDjqjkPs8c1JlnAFp9QEeiYJRX1C3qoiBU5Axt/7J36sYrEM=
 =kDNG
 -----END PGP SIGNATURE-----

Merge tag 'for-6.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This brings mostly refactoring, cleanups, minor performance
  optimizations and usual fixes. The folio API conversions are most
  noticeable.

  There's one less visible change that could have a high impact. The
  extent lock scope for read is reduced, not held for the entire
  operation. In the buffered read case it's left to page or inode lock,
  some direct io read synchronization is still needed.

  This used to prevent deadlocks induced by page faults during direct
  io, so there was a 4K limitation on the requests, e.g. for io_uring.
  In the future this will allow smoother integration with iomap where
  the extent read lock was a major obstacle.

  User visible changes:

   - the FSTRIM ioctl updates the processed range even after an error or
     interruption

   - cleaner thread is woken up in SYNC ioctl instead of waking the
     transaction thread that can take some delay before waking up the
     cleaner, this can speed up cleaning of deleted subvolumes

   - print an error message when opening a device fail, e.g. when it's
     unexpectedly read-only

  Core changes:

   - improved extent map handling in various ways (locking, iteration, ...)

   - new assertions and locking annotations

   - raid-stripe-tree locking fixes

   - use xarray for tracking dirty qgroup extents, switched from rb-tree

   - turn the subpage test to compile-time condition if possible (e.g.
     on x86_64 with 4K pages), this allows to skip a lot of ifs and
     remove dead code

   - more preparatory work for compression in subpage mode

  Cleanups and refactoring

   - folio API conversions, many simple cases where page is passed so
     switch it to folios

   - more subpage code refactoring, update page state bitmap processing

   - introduce auto free for btrfs_path structure, use for the simple
     cases"

* tag 'for-6.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (110 commits)
  btrfs: only unlock the to-be-submitted ranges inside a folio
  btrfs: merge btrfs_folio_unlock_writer() into btrfs_folio_end_writer_lock()
  btrfs: BTRFS_PATH_AUTO_FREE in orphan.c
  btrfs: use btrfs_path auto free in zoned.c
  btrfs: DEFINE_FREE for struct btrfs_path
  btrfs: remove btrfs_folio_end_all_writers()
  btrfs: constify more pointer parameters
  btrfs: rework BTRFS_I as macro to preserve parameter const
  btrfs: add and use helper to verify the calling task has locked the inode
  btrfs: always update fstrim_range on failure in FITRIM ioctl
  btrfs: convert copy_inline_to_page() to use folio
  btrfs: convert btrfs_decompress() to take a folio
  btrfs: convert zstd_decompress() to take a folio
  btrfs: convert lzo_decompress() to take a folio
  btrfs: convert zlib_decompress() to take a folio
  btrfs: convert try_release_extent_mapping() to take a folio
  btrfs: convert try_release_extent_state() to take a folio
  btrfs: convert submit_eb_page() to take a folio
  btrfs: convert submit_eb_subpage() to take a folio
  btrfs: convert read_key_bytes() to take a folio
  ...
This commit is contained in:
Linus Torvalds 2024-09-16 13:10:46 +02:00
commit 7a40974fd0
65 changed files with 1583 additions and 1597 deletions

View File

@ -219,8 +219,8 @@ static void free_pref(struct prelim_ref *ref)
* A -1 return indicates ref1 is a 'lower' block than ref2, while 1
* indicates a 'higher' block.
*/
static int prelim_ref_compare(struct prelim_ref *ref1,
struct prelim_ref *ref2)
static int prelim_ref_compare(const struct prelim_ref *ref1,
const struct prelim_ref *ref2)
{
if (ref1->level < ref2->level)
return -1;
@ -251,7 +251,7 @@ static int prelim_ref_compare(struct prelim_ref *ref1,
}
static void update_share_count(struct share_check *sc, int oldcount,
int newcount, struct prelim_ref *newref)
int newcount, const struct prelim_ref *newref)
{
if ((!sc) || (oldcount == 0 && newcount < 1))
return;

View File

@ -53,7 +53,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
* btrfs, and is used for all I/O submitted through btrfs_submit_bbio().
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
@ -120,12 +120,6 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
}
}
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
__btrfs_bio_end_io(bbio);
}
static void btrfs_orig_write_end_io(struct bio *bio);
static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
@ -147,8 +141,9 @@ static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
}
}
static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
@ -179,7 +174,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
{
if (atomic_dec_and_test(&fbio->repair_count)) {
btrfs_orig_bbio_end_io(fbio->bbio);
btrfs_bio_end_io(fbio->bbio, fbio->bbio->bio.bi_status);
mempool_free(fbio, &btrfs_failed_bio_pool);
}
}
@ -211,7 +206,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
goto done;
}
btrfs_submit_bio(repair_bbio, mirror);
btrfs_submit_bbio(repair_bbio, mirror);
return;
}
@ -280,7 +275,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
btrfs_submit_bio(repair_bbio, mirror);
btrfs_submit_bbio(repair_bbio, mirror);
return fbio;
}
@ -326,7 +321,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
if (fbio)
btrfs_repair_done(fbio);
else
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
@ -360,7 +355,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_simple_end_io(struct bio *bio)
@ -380,7 +375,7 @@ static void btrfs_simple_end_io(struct bio *bio)
} else {
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
}
@ -394,7 +389,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
btrfs_put_bioc(bioc);
}
@ -424,7 +419,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
btrfs_put_bioc(bioc);
}
@ -502,8 +497,8 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
if (!bioc) {
/* Single mirror read/write fast path. */
@ -593,7 +588,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
/* If an error occurred we just want to clean up the bio and move on. */
if (bio->bi_status) {
btrfs_orig_bbio_end_io(async->bbio);
btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status);
return;
}
@ -603,7 +598,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
* context. This changes nothing when cgroups aren't in use.
*/
bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
__btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
}
static bool should_async_write(struct btrfs_bio *bbio)
@ -678,7 +673,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
blk_status_t ret;
int error;
smap.is_scrub = !bbio->inode;
if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
smap.rst_search_commit_root = true;
else
smap.rst_search_commit_root = false;
btrfs_bio_counter_inc_blocked(fs_info);
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
@ -749,7 +747,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
}
}
__btrfs_submit_bio(bio, bioc, &smap, mirror_num);
btrfs_submit_bio(bio, bioc, &smap, mirror_num);
done:
return map_length == length;
@ -765,16 +763,14 @@ fail:
ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
ASSERT(remaining);
remaining->bio.bi_status = ret;
btrfs_orig_bbio_end_io(remaining);
btrfs_bio_end_io(remaining, ret);
}
bbio->bio.bi_status = ret;
btrfs_orig_bbio_end_io(bbio);
btrfs_bio_end_io(bbio, ret);
/* Do not submit another chunk */
return true;
}
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
{
/* If bbio->inode is not populated, its file_offset must be 0. */
ASSERT(bbio->inode || bbio->file_offset == 0);
@ -786,7 +782,7 @@ void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
/*
* Submit a repair write.
*
* This bypasses btrfs_submit_bio deliberately, as that writes all copies in a
* This bypasses btrfs_submit_bbio() deliberately, as that writes all copies in a
* RAID setup. Here we only want to write the one bad copy, so we do the
* mapping ourselves and submit the bio directly.
*
@ -875,7 +871,7 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
__btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
return;
fail:

View File

@ -29,7 +29,7 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
* Highlevel btrfs I/O structure. It is allocated by btrfs_bio_alloc and
* passed to btrfs_submit_bio for mapping to the physical devices.
* passed to btrfs_submit_bbio() for mapping to the physical devices.
*/
struct btrfs_bio {
/*
@ -42,7 +42,7 @@ struct btrfs_bio {
union {
/*
* For data reads: checksumming and original I/O information.
* (for internal use in the btrfs_submit_bio machinery only)
* (for internal use in the btrfs_submit_bbio() machinery only)
*/
struct {
u8 *csum;
@ -104,7 +104,7 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct folio *folio,

View File

@ -23,7 +23,7 @@
#include "extent-tree.h"
#ifdef CONFIG_BTRFS_DEBUG
int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group)
int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@ -40,9 +40,9 @@ int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group)
*
* Should be called with balance_lock held
*/
static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
static u64 get_restripe_target(const struct btrfs_fs_info *fs_info, u64 flags)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
const struct btrfs_balance_control *bctl = fs_info->balance_ctl;
u64 target = 0;
if (!bctl)
@ -1415,9 +1415,9 @@ out:
}
static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
struct btrfs_block_group *bg)
const struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_transaction *prev_trans = NULL;
const u64 start = bg->start;
const u64 end = start + bg->length - 1;
@ -1756,14 +1756,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
return bg1->used > bg2->used;
}
static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
{
if (btrfs_is_zoned(fs_info))
return btrfs_zoned_should_reclaim(fs_info);
return true;
}
static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
static bool should_reclaim_block_group(const struct btrfs_block_group *bg, u64 bytes_freed)
{
const int thresh_pct = btrfs_calc_reclaim_threshold(bg->space_info);
u64 thresh_bytes = mult_perc(bg->length, thresh_pct);
@ -2006,8 +2006,8 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
spin_unlock(&fs_info->unused_bgs_lock);
}
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
struct btrfs_path *path)
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key,
const struct btrfs_path *path)
{
struct btrfs_chunk_map *map;
struct btrfs_block_group_item bg;
@ -2055,7 +2055,7 @@ out_free_map:
static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_key *key)
const struct btrfs_key *key)
{
struct btrfs_root *root = btrfs_block_group_root(fs_info);
int ret;
@ -2640,8 +2640,8 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
}
static int insert_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 chunk_offset,
u64 start, u64 num_bytes)
const struct btrfs_device *device, u64 chunk_offset,
u64 start, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_root *root = fs_info->dev_root;
@ -2817,7 +2817,7 @@ next:
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
*/
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 offset)
{
u64 div = SZ_1G;
u64 index;
@ -3842,8 +3842,8 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
}
}
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *sinfo, int force)
{
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
@ -4218,7 +4218,7 @@ out:
return ret;
}
static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
static u64 get_profile_num_devs(const struct btrfs_fs_info *fs_info, u64 type)
{
u64 num_dev;
@ -4622,7 +4622,7 @@ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
return 0;
}
bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg)
bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg)
{
if (btrfs_is_zoned(bg->fs_info))
return false;

View File

@ -266,7 +266,7 @@ struct btrfs_block_group {
u64 reclaim_mark;
};
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
static inline u64 btrfs_block_group_end(const struct btrfs_block_group *block_group)
{
return (block_group->start + block_group->length);
}
@ -278,8 +278,7 @@ static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
}
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
static inline bool btrfs_is_block_group_data_only(const struct btrfs_block_group *block_group)
{
/*
* In mixed mode the fragmentation is expected to be high, lowering the
@ -290,7 +289,7 @@ static inline bool btrfs_is_block_group_data_only(
}
#ifdef CONFIG_BTRFS_DEBUG
int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group);
int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group);
#endif
struct btrfs_block_group *btrfs_lookup_first_block_group(
@ -370,7 +369,7 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
static inline int btrfs_block_group_done(const struct btrfs_block_group *cache)
{
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED ||
@ -387,6 +386,6 @@ enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size);
int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
enum btrfs_block_group_size_class size_class,
bool force_wrong_size_class);
bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg);
bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg);
#endif /* BTRFS_BLOCK_GROUP_H */

View File

@ -553,7 +553,7 @@ try_reserve:
return ERR_PTR(ret);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
int btrfs_check_trunc_cache_free_space(const struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv)
{
u64 needed_bytes;

View File

@ -89,7 +89,7 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
int btrfs_check_trunc_cache_free_space(const struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,

View File

@ -350,10 +350,12 @@ static inline void btrfs_set_first_dir_index_to_log(struct btrfs_inode *inode,
WRITE_ONCE(inode->first_dir_index_to_log, index);
}
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
{
return container_of(inode, struct btrfs_inode, vfs_inode);
}
/* Type checked and const-preserving VFS inode -> btrfs inode. */
#define BTRFS_I(_inode) \
_Generic(_inode, \
struct inode *: container_of(_inode, struct btrfs_inode, vfs_inode), \
const struct inode *: (const struct btrfs_inode *)container_of( \
_inode, const struct btrfs_inode, vfs_inode))
static inline unsigned long btrfs_inode_hash(u64 objectid,
const struct btrfs_root *root)
@ -505,6 +507,14 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
return true;
}
static inline void btrfs_assert_inode_locked(struct btrfs_inode *inode)
{
/* Immediately trigger a crash if the inode is not locked. */
ASSERT(inode_is_locked(&inode->vfs_inode));
/* Trigger a splat in dmesg if this task is not holding the lock. */
lockdep_assert_held(&inode->vfs_inode.i_rwsem);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
@ -578,7 +588,7 @@ struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
struct btrfs_path *path);
struct inode *btrfs_iget(u64 ino, struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, u64 start, u64 len);
struct folio *folio, u64 start, u64 len);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
@ -596,9 +606,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_folio,
u64 start, u64 end, struct writeback_control *wbc);
int btrfs_writepage_cow_fixup(struct page *page);
int btrfs_writepage_cow_fixup(struct folio *folio);
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
int compress_type);
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,

View File

@ -138,15 +138,15 @@ static int compression_decompress_bio(struct list_head *ws,
}
static int compression_decompress(int type, struct list_head *ws,
const u8 *data_in, struct page *dest_page,
const u8 *data_in, struct folio *dest_folio,
unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page,
case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen);
case BTRFS_COMPRESS_NONE:
default:
@ -395,7 +395,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_folios(cb);
btrfs_submit_bio(&cb->bbio, 0);
btrfs_submit_bbio(&cb->bbio, 0);
}
/*
@ -420,7 +420,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
u64 isize = i_size_read(inode);
int ret;
struct page *page;
struct folio *folio;
struct extent_map *em;
struct address_space *mapping = inode->i_mapping;
struct extent_map_tree *em_tree;
@ -453,9 +453,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (pg_index > end_index)
break;
page = xa_load(&mapping->i_pages, pg_index);
if (page && !xa_is_value(page)) {
sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
folio = __filemap_get_folio(mapping, pg_index, 0, 0);
if (!IS_ERR(folio)) {
u64 folio_sz = folio_size(folio);
u64 offset = offset_in_folio(folio, cur);
folio_put(folio);
sectors_missed += (folio_sz - offset) >>
fs_info->sectorsize_bits;
/* Beyond threshold, no need to continue */
@ -466,35 +470,35 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* Jump to next page start as we already have page for
* current offset.
*/
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
cur += (folio_sz - offset);
continue;
}
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
~__GFP_FS));
if (!page)
folio = filemap_alloc_folio(mapping_gfp_constraint(mapping,
~__GFP_FS), 0);
if (!folio)
break;
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
put_page(page);
if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) {
/* There is already a page, skip to page end */
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
cur += folio_size(folio);
folio_put(folio);
continue;
}
if (!*memstall && PageWorkingset(page)) {
if (!*memstall && folio_test_workingset(folio)) {
psi_memstall_enter(pflags);
*memstall = 1;
}
ret = set_page_extent_mapped(page);
ret = set_folio_extent_mapped(folio);
if (ret < 0) {
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
break;
}
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
page_end = (pg_index << PAGE_SHIFT) + folio_size(folio) - 1;
lock_extent(tree, cur, page_end, NULL);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
@ -511,28 +515,28 @@ static noinline int add_ra_bio_pages(struct inode *inode,
orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
break;
}
add_size = min(em->start + em->len, page_end + 1) - cur;
free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL);
if (page->index == end_index) {
size_t zero_offset = offset_in_page(isize);
if (folio->index == end_index) {
size_t zero_offset = offset_in_folio(folio, isize);
if (zero_offset) {
int zeros;
zeros = PAGE_SIZE - zero_offset;
memzero_page(page, zero_offset, zeros);
zeros = folio_size(folio) - zero_offset;
folio_zero_range(folio, zero_offset, zeros);
}
}
ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur));
if (ret != add_size) {
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
put_page(page);
if (!bio_add_folio(orig_bio, folio, add_size,
offset_in_folio(folio, cur))) {
folio_unlock(folio);
folio_put(folio);
break;
}
/*
@ -541,9 +545,9 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* subpage::readers and to unlock the page.
*/
if (fs_info->sectorsize < PAGE_SIZE)
btrfs_subpage_start_reader(fs_info, page_folio(page),
cur, add_size);
put_page(page);
btrfs_subpage_start_reader(fs_info, folio, cur,
add_size);
folio_put(folio);
cur += add_size;
}
return 0;
@ -626,7 +630,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
if (memstall)
psi_memstall_leave(&pflags);
btrfs_submit_bio(&cb->bbio, 0);
btrfs_submit_bbio(&cb->bbio, 0);
return;
out_free_compressed_pages:
@ -1057,10 +1061,10 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
* single page, and we want to read a single page out of it.
* start_byte tells us the offset into the compressed data we're interested in
*/
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio);
struct list_head *workspace;
const u32 sectorsize = fs_info->sectorsize;
int ret;
@ -1073,7 +1077,7 @@ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
workspace = get_workspace(type, 0);
ret = compression_decompress(type, workspace, data_in, dest_page,
ret = compression_decompress(type, workspace, data_in, dest_folio,
dest_pgoff, srclen, destlen);
put_workspace(type, workspace);

View File

@ -82,13 +82,21 @@ static inline unsigned int btrfs_compress_level(unsigned int type_level)
return ((type_level & 0xF0) >> 4);
}
/* @range_end must be exclusive. */
static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
{
u64 page_end = round_down(cur, PAGE_SIZE) + PAGE_SIZE;
return min(range_end, page_end) - cur;
}
int __init btrfs_init_compress(void);
void __cold btrfs_exit_compress(void);
int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
unsigned long start_byte, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
@ -154,7 +162,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *zlib_alloc_workspace(unsigned int level);
void zlib_free_workspace(struct list_head *ws);
@ -165,7 +173,7 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);
@ -175,7 +183,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);

View File

@ -2564,8 +2564,8 @@ int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
*
*/
static void fixup_low_keys(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
const struct btrfs_path *path,
const struct btrfs_disk_key *key, int level)
{
int i;
struct extent_buffer *t;
@ -2594,7 +2594,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
* that the new key won't break the order
*/
void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
const struct btrfs_path *path,
const struct btrfs_key *new_key)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@ -2660,8 +2660,8 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
* is correct, we only need to bother the last key of @left and the first
* key of @right.
*/
static bool check_sibling_keys(struct extent_buffer *left,
struct extent_buffer *right)
static bool check_sibling_keys(const struct extent_buffer *left,
const struct extent_buffer *right)
{
struct btrfs_key left_last;
struct btrfs_key right_first;
@ -2928,8 +2928,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
* blocknr is the block the key points to.
*/
static int insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
const struct btrfs_path *path,
const struct btrfs_disk_key *key, u64 bytenr,
int slot, int level)
{
struct extent_buffer *lower;
@ -4019,7 +4019,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
* the front.
*/
void btrfs_truncate_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u32 new_size, int from_end)
const struct btrfs_path *path, u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
@ -4111,7 +4111,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
* make the item pointed to by the path bigger, data_size is the added size.
*/
void btrfs_extend_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u32 data_size)
const struct btrfs_path *path, u32 data_size)
{
int slot;
struct extent_buffer *leaf;

View File

@ -6,6 +6,7 @@
#ifndef BTRFS_CTREE_H
#define BTRFS_CTREE_H
#include "linux/cleanup.h"
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
@ -84,6 +85,9 @@ struct btrfs_path {
unsigned int nowait:1;
};
#define BTRFS_PATH_AUTO_FREE(path_name) \
struct btrfs_path *path_name __free(btrfs_free_path) = NULL
/*
* The state of btrfs root
*/
@ -538,7 +542,7 @@ int btrfs_previous_item(struct btrfs_root *root,
int btrfs_previous_extent_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid);
void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
const struct btrfs_path *path,
const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@ -572,9 +576,9 @@ bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
void btrfs_extend_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u32 data_size);
const struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u32 new_size, int from_end);
const struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@ -598,6 +602,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root,
void btrfs_release_path(struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
DEFINE_FREE(btrfs_free_path, struct btrfs_path *, btrfs_free_path(_T))
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int slot, int nr);

View File

@ -45,8 +45,8 @@ struct inode_defrag {
u32 extent_thresh;
};
static int __compare_inode_defrag(struct inode_defrag *defrag1,
struct inode_defrag *defrag2)
static int compare_inode_defrag(const struct inode_defrag *defrag1,
const struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
return 1;
@ -61,16 +61,14 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
}
/*
* Pop a record for an inode into the defrag tree. The lock must be held
* Insert a record for an inode into the defrag tree. The lock must be held
* already.
*
* If you're inserting a record for an older transid than an existing record,
* the transid already in the tree is lowered.
*
* If an existing record is found the defrag item you pass in is freed.
*/
static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
static int btrfs_insert_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct inode_defrag *entry;
@ -83,7 +81,7 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
ret = __compare_inode_defrag(defrag, entry);
ret = compare_inode_defrag(defrag, entry);
if (ret < 0)
p = &parent->rb_left;
else if (ret > 0)
@ -107,7 +105,7 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
return 0;
}
static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
static inline int need_auto_defrag(struct btrfs_fs_info *fs_info)
{
if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
return 0;
@ -119,34 +117,28 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
}
/*
* Insert a defrag record for this inode if auto defrag is enabled.
* Insert a defrag record for this inode if auto defrag is enabled. No errors
* returned as they're not considered fatal.
*/
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u32 extent_thresh)
void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode_defrag *defrag;
u64 transid;
int ret;
if (!__need_auto_defrag(fs_info))
return 0;
if (!need_auto_defrag(fs_info))
return;
if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
return 0;
if (trans)
transid = trans->transid;
else
transid = btrfs_get_root_last_trans(root);
return;
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
if (!defrag)
return -ENOMEM;
return;
defrag->ino = btrfs_ino(inode);
defrag->transid = transid;
defrag->transid = btrfs_get_root_last_trans(root);
defrag->root = btrfs_root_id(root);
defrag->extent_thresh = extent_thresh;
@ -157,14 +149,13 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
* and then re-read this inode, this new inode doesn't have
* IN_DEFRAG flag. At the case, we may find the existed defrag.
*/
ret = __btrfs_add_inode_defrag(inode, defrag);
ret = btrfs_insert_inode_defrag(inode, defrag);
if (ret)
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
} else {
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
}
spin_unlock(&fs_info->defrag_inodes_lock);
return 0;
}
/*
@ -189,7 +180,7 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
parent = p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
ret = __compare_inode_defrag(&tmp, entry);
ret = compare_inode_defrag(&tmp, entry);
if (ret < 0)
p = parent->rb_left;
else if (ret > 0)
@ -198,7 +189,7 @@ static struct inode_defrag *btrfs_pick_defrag_inode(
goto out;
}
if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
if (parent && compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
if (parent)
entry = rb_entry(parent, struct inode_defrag, rb_node);
@ -214,27 +205,22 @@ out:
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
{
struct inode_defrag *defrag;
struct rb_node *node;
struct inode_defrag *defrag, *next;
spin_lock(&fs_info->defrag_inodes_lock);
node = rb_first(&fs_info->defrag_inodes);
while (node) {
rb_erase(node, &fs_info->defrag_inodes);
defrag = rb_entry(node, struct inode_defrag, rb_node);
rbtree_postorder_for_each_entry_safe(defrag, next,
&fs_info->defrag_inodes, rb_node)
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
cond_resched_lock(&fs_info->defrag_inodes_lock);
node = rb_first(&fs_info->defrag_inodes);
}
spin_unlock(&fs_info->defrag_inodes_lock);
}
#define BTRFS_DEFRAG_BATCH 1024
static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct inode_defrag *defrag)
static int btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
struct inode_defrag *defrag,
struct file_ra_state *ra)
{
struct btrfs_root *inode_root;
struct inode *inode;
@ -245,7 +231,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
again:
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
goto cleanup;
if (!__need_auto_defrag(fs_info))
if (!need_auto_defrag(fs_info))
goto cleanup;
/* Get the inode */
@ -273,9 +259,10 @@ again:
range.len = (u64)-1;
range.start = cur;
range.extent_thresh = defrag->extent_thresh;
file_ra_state_init(ra, inode->i_mapping);
sb_start_write(fs_info->sb);
ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
ret = btrfs_defrag_file(inode, ra, &range, defrag->transid,
BTRFS_DEFRAG_BATCH);
sb_end_write(fs_info->sb);
iput(inode);
@ -302,11 +289,13 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
atomic_inc(&fs_info->defrag_running);
while (1) {
struct file_ra_state ra = { 0 };
/* Pause the auto defragger. */
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
break;
if (!__need_auto_defrag(fs_info))
if (!need_auto_defrag(fs_info))
break;
/* find an inode to defrag */
@ -324,7 +313,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
__btrfs_run_defrag_inode(fs_info, defrag);
btrfs_run_defrag_inode(fs_info, defrag, &ra);
}
atomic_dec(&fs_info->defrag_running);
@ -1317,8 +1306,7 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
if (entry->start + range_len <= *last_scanned_ret)
continue;
if (ra)
page_cache_sync_readahead(inode->vfs_inode.i_mapping,
page_cache_sync_readahead(inode->vfs_inode.i_mapping,
ra, NULL, entry->start >> PAGE_SHIFT,
((entry->start + range_len - 1) >> PAGE_SHIFT) -
(entry->start >> PAGE_SHIFT) + 1);
@ -1350,7 +1338,7 @@ out:
* Entry point to file defragmentation.
*
* @inode: inode to be defragged
* @ra: readahead state (can be NUL)
* @ra: readahead state
* @range: defrag options including range and flags
* @newer_than: minimum transid to defrag
* @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
@ -1372,12 +1360,13 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 cur;
u64 last_byte;
bool do_compress = (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS);
bool ra_allocated = false;
int compress_type = BTRFS_COMPRESS_ZLIB;
int ret = 0;
u32 extent_thresh = range->extent_thresh;
pgoff_t start_index;
ASSERT(ra);
if (isize == 0)
return 0;
@ -1406,18 +1395,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
cur = round_down(range->start, fs_info->sectorsize);
last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
/*
* If we were not given a ra, allocate a readahead context. As
* readahead is just an optimization, defrag will work without it so
* we don't error out.
*/
if (!ra) {
ra_allocated = true;
ra = kzalloc(sizeof(*ra), GFP_KERNEL);
if (ra)
file_ra_state_init(ra, inode->i_mapping);
}
/*
* Make writeback start from the beginning of the range, so that the
* defrag range can be written sequentially.
@ -1472,8 +1449,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
cond_resched();
}
if (ra_allocated)
kfree(ra);
/*
* Update range.start for autodefrag, this will indicate where to start
* in next run.

View File

@ -18,8 +18,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 newer_than, unsigned long max_to_defrag);
int __init btrfs_auto_defrag_init(void);
void __cold btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u32 extent_thresh);
void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_defrag_root(struct btrfs_root *root);

View File

@ -855,11 +855,17 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
/* Record qgroup extent info if provided */
if (qrecord) {
if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
delayed_refs, qrecord))
int ret;
ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
delayed_refs, qrecord);
if (ret) {
/* Clean up if insertion fails or item exists. */
xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
kfree(qrecord);
else
} else {
qrecord_inserted = true;
}
}
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
@ -1005,18 +1011,16 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
return -ENOMEM;
}
if (!head_ref)
goto free_node;
if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
return -ENOMEM;
}
if (!record)
goto free_head_ref;
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
generic_ref->bytenr, GFP_NOFS))
goto free_record;
}
init_delayed_ref_common(fs_info, node, generic_ref);
@ -1052,6 +1056,14 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(trans, record);
return 0;
free_record:
kfree(record);
free_head_ref:
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
free_node:
kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
return -ENOMEM;
}
/*

View File

@ -202,8 +202,8 @@ struct btrfs_delayed_ref_root {
/* head ref rbtree */
struct rb_root_cached href_root;
/* dirty extent records */
struct rb_root dirty_extent_root;
/* Track dirty extent records. */
struct xarray dirty_extents;
/* this spin lock protects the rbtree and the entries inside */
spinlock_t lock;

View File

@ -824,22 +824,45 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev)
{
u64 start = 0;
int i;
struct rb_node *node;
/*
* The chunk mutex must be held so that no new chunks can be created
* while we are updating existing chunks. This guarantees we don't miss
* any new chunk that gets created for a range that falls before the
* range of the last chunk we processed.
*/
lockdep_assert_held(&fs_info->chunk_mutex);
write_lock(&fs_info->mapping_tree_lock);
do {
node = rb_first_cached(&fs_info->mapping_tree);
while (node) {
struct rb_node *next = rb_next(node);
struct btrfs_chunk_map *map;
u64 next_start;
map = btrfs_find_chunk_map_nolock(fs_info, start, U64_MAX);
if (!map)
break;
for (i = 0; i < map->num_stripes; i++)
map = rb_entry(node, struct btrfs_chunk_map, rb_node);
next_start = map->start + map->chunk_len;
for (int i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
start = map->start + map->chunk_len;
btrfs_free_chunk_map(map);
} while (start);
if (cond_resched_rwlock_write(&fs_info->mapping_tree_lock)) {
map = btrfs_find_chunk_map_nolock(fs_info, next_start, U64_MAX);
if (!map)
break;
node = &map->rb_node;
/*
* Drop the lookup reference since we are holding the
* lock in write mode and no one can remove the chunk
* map from the tree and drop its tree reference.
*/
btrfs_free_chunk_map(map);
} else {
node = next;
}
}
write_unlock(&fs_info->mapping_tree_lock);
}

View File

@ -40,11 +40,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
struct btrfs_ordered_extent *ordered;
int ret = 0;
/* Direct lock must be taken before the extent lock. */
if (nowait) {
if (!try_lock_dio_extent(io_tree, lockstart, lockend, cached_state))
return -EAGAIN;
} else {
lock_dio_extent(io_tree, lockstart, lockend, cached_state);
}
while (1) {
if (nowait) {
if (!try_lock_extent(io_tree, lockstart, lockend,
cached_state))
return -EAGAIN;
cached_state)) {
ret = -EAGAIN;
break;
}
} else {
lock_extent(io_tree, lockstart, lockend, cached_state);
}
@ -120,6 +130,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
cond_resched();
}
if (ret)
unlock_dio_extent(io_tree, lockstart, lockend, cached_state);
return ret;
}
@ -353,7 +365,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
int ret = 0;
u64 len = length;
const u64 data_alloc_len = length;
bool unlock_extents = false;
u32 unlock_bits = EXTENT_LOCKED;
/*
* We could potentially fault if we have a buffer > PAGE_SIZE, and if
@ -514,7 +526,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
start, &len, flags);
if (ret < 0)
goto unlock_err;
unlock_extents = true;
/* Recalc len in case the new em is smaller than requested */
len = min(len, em->len - (start - em->start));
if (dio_data->data_space_reserved) {
@ -535,22 +546,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
release_offset,
release_len);
}
} else {
/*
* We need to unlock only the end area that we aren't using.
* The rest is going to be unlocked by the endio routine.
*/
lockstart = start + len;
if (lockstart < lockend)
unlock_extents = true;
}
if (unlock_extents)
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
else
free_extent_state(cached_state);
/*
* Translate extent map information to iomap.
* We trim the extents (and move the addr) even though iomap code does
@ -569,11 +566,33 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->length = len;
free_extent_map(em);
/*
* Reads will hold the EXTENT_DIO_LOCKED bit until the io is completed,
* writes only hold it for this part. We hold the extent lock until
* we're completely done with the extent map to make sure it remains
* valid.
*/
if (write)
unlock_bits |= EXTENT_DIO_LOCKED;
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, &cached_state);
/* We didn't use everything, unlock the dio extent for the remainder. */
if (!write && (start + len) < lockend)
unlock_dio_extent(&BTRFS_I(inode)->io_tree, start + len,
lockend, NULL);
return 0;
unlock_err:
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
/*
* Don't use EXTENT_LOCK_BITS here in case we extend it later and forget
* to update this, be explicit that we expect EXTENT_LOCKED and
* EXTENT_DIO_LOCKED to be set here, and so that's what we're clearing.
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
EXTENT_LOCKED | EXTENT_DIO_LOCKED, &cached_state);
err:
if (dio_data->data_space_reserved) {
btrfs_free_reserved_data_space(BTRFS_I(inode),
@ -596,8 +615,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (!write && (iomap->type == IOMAP_HOLE)) {
/* If reading from a hole, unlock and return */
unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1,
NULL);
unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
return 0;
}
@ -608,8 +627,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
btrfs_finish_ordered_extent(dio_data->ordered, NULL,
pos, length, false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
ret = -ENOTBLK;
}
if (write) {
@ -641,8 +660,8 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
dip->file_offset, dip->bytes,
!bio->bi_status);
} else {
unlock_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
unlock_dio_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
}
bbio->bio.bi_private = bbio->private;
@ -726,7 +745,7 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
}
}
btrfs_submit_bio(bbio, 0);
btrfs_submit_bbio(bbio, 0);
}
static const struct iomap_ops btrfs_dio_iomap_ops = {

View File

@ -68,7 +68,7 @@ static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
};
static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
const struct btrfs_block_group *block_group)
{
return &discard_ctl->discard_list[block_group->discard_index];
}
@ -80,7 +80,7 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
*
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
*/
static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
static bool btrfs_run_discard_work(const struct btrfs_discard_ctl *discard_ctl)
{
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
struct btrfs_fs_info,

View File

@ -525,7 +525,7 @@ static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
if (folio_test_writeback(folio) || folio_test_dirty(folio))
return false;
return try_release_extent_buffer(&folio->page);
return try_release_extent_buffer(folio);
}
static void btree_invalidate_folio(struct folio *folio, size_t offset,
@ -1285,7 +1285,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info->subpage_info);
kvfree(fs_info);
}
@ -3322,6 +3321,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
@ -3346,20 +3346,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
if (sectorsize < PAGE_SIZE)
btrfs_warn(fs_info,
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
if (!subpage_info) {
ret = -ENOMEM;
goto fail_alloc;
}
btrfs_init_subpage_info(subpage_info, sectorsize);
fs_info->subpage_info = subpage_info;
}
ret = btrfs_init_workqueues(fs_info);
if (ret)

View File

@ -126,7 +126,7 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
* Empty an io tree, removing and freeing every extent state record from the
* tree. This should be called once we are sure no other task can access the
* tree anymore, so no tree updates happen after we empty the tree and there
* aren't any waiters on any extent state record (EXTENT_LOCKED bit is never
* aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never
* set on any extent state when calling this function).
*/
void extent_io_tree_release(struct extent_io_tree *tree)
@ -141,7 +141,7 @@ void extent_io_tree_release(struct extent_io_tree *tree)
rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) {
/* Clear node to keep free_extent_state() happy. */
RB_CLEAR_NODE(&state->rb_node);
ASSERT(!(state->state & EXTENT_LOCKED));
ASSERT(!(state->state & EXTENT_LOCK_BITS));
/*
* No need for a memory barrier here, as we are holding the tree
* lock and we only change the waitqueue while holding that lock
@ -399,7 +399,7 @@ static void merge_next_state(struct extent_io_tree *tree, struct extent_state *s
*/
static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
{
if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY))
return;
merge_prev_state(tree, state);
@ -445,7 +445,7 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
struct rb_node *parent = NULL;
const u64 start = state->start - 1;
const u64 end = state->end + 1;
const bool try_merge = !(bits & (EXTENT_LOCKED | EXTENT_BOUNDARY));
const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
set_state_bits(tree, state, bits, changeset);
@ -616,9 +616,6 @@ static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
* inserting elements in the tree, so the gfp mask is used to indicate which
* allocations or sleeping are allowed.
*
* Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
* range from the tree regardless of state (ie for truncate).
*
* The range [start, end] is inclusive.
*
* This takes the tree lock, and returns 0 on success and < 0 on error.
@ -647,8 +644,8 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
wake = (bits & EXTENT_LOCKED) ? 1 : 0;
if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
wake = ((bits & EXTENT_LOCK_BITS) ? 1 : 0);
if (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY))
clear = 1;
again:
if (!prealloc) {
@ -861,8 +858,7 @@ static void cache_state_if_flags(struct extent_state *state,
static void cache_state(struct extent_state *state,
struct extent_state **cached_ptr)
{
return cache_state_if_flags(state, cached_ptr,
EXTENT_LOCKED | EXTENT_BOUNDARY);
return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY);
}
/*
@ -1063,7 +1059,7 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int ret = 0;
u64 last_start;
u64 last_end;
u32 exclusive_bits = (bits & EXTENT_LOCKED);
u32 exclusive_bits = (bits & EXTENT_LOCK_BITS);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
@ -1812,12 +1808,11 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset)
{
/*
* We don't support EXTENT_LOCKED yet, as current changeset will
* record any bits changed, so for EXTENT_LOCKED case, it will
* either fail with -EEXIST or changeset will record the whole
* range.
* We don't support EXTENT_LOCK_BITS yet, as current changeset will
* record any bits changed, so for EXTENT_LOCK_BITS case, it will either
* fail with -EEXIST or changeset will record the whole range.
*/
ASSERT(!(bits & EXTENT_LOCKED));
ASSERT(!(bits & EXTENT_LOCK_BITS));
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
}
@ -1826,26 +1821,25 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset)
{
/*
* Don't support EXTENT_LOCKED case, same reason as
* Don't support EXTENT_LOCK_BITS case, same reason as
* set_record_extent_bits().
*/
ASSERT(!(bits & EXTENT_LOCKED));
ASSERT(!(bits & EXTENT_LOCK_BITS));
return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
struct extent_state **cached)
{
int err;
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
err = __set_extent_bit(tree, start, end, bits, &failed_start,
NULL, cached, NULL);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, cached);
clear_extent_bit(tree, start, failed_start - 1, bits, cached);
return 0;
}
return 1;
@ -1855,23 +1849,22 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
* Either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state)
int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
struct extent_state **cached_state)
{
struct extent_state *failed_state = NULL;
int err;
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
err = __set_extent_bit(tree, start, end, bits, &failed_start,
&failed_state, cached_state, NULL);
while (err == -EEXIST) {
if (failed_start != start)
clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, cached_state);
bits, cached_state);
wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
&failed_state);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
wait_extent_bit(tree, failed_start, end, bits, &failed_state);
err = __set_extent_bit(tree, start, end, bits,
&failed_start, &failed_state,
cached_state, NULL);
}

View File

@ -19,6 +19,7 @@ enum {
ENUM_BIT(EXTENT_DIRTY),
ENUM_BIT(EXTENT_UPTODATE),
ENUM_BIT(EXTENT_LOCKED),
ENUM_BIT(EXTENT_DIO_LOCKED),
ENUM_BIT(EXTENT_NEW),
ENUM_BIT(EXTENT_DELALLOC),
ENUM_BIT(EXTENT_DEFRAG),
@ -67,6 +68,8 @@ enum {
EXTENT_ADD_INODE_BYTES | \
EXTENT_CLEAR_ALL_BITS)
#define EXTENT_LOCK_BITS (EXTENT_LOCKED | EXTENT_DIO_LOCKED)
/*
* Redefined bits above which are used only in the device allocation tree,
* shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
@ -134,12 +137,22 @@ const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tre
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner);
void extent_io_tree_release(struct extent_io_tree *tree);
int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
struct extent_state **cached);
bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
struct extent_state **cached);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
{
return __lock_extent(tree, start, end, EXTENT_LOCKED, cached);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
static inline bool try_lock_extent(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __try_lock_extent(tree, start, end, EXTENT_LOCKED, cached);
}
int __init extent_state_init_cachep(void);
void __cold extent_state_free_cachep(void);
@ -212,5 +225,22 @@ int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state);
static inline int lock_dio_extent(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached);
}
static inline bool try_lock_dio_extent(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __try_lock_extent(tree, start, end, EXTENT_DIO_LOCKED, cached);
}
static inline int unlock_dio_extent(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_DIO_LOCKED, cached, NULL);
}
#endif /* BTRFS_EXTENT_IO_TREE_H */

View File

@ -6551,13 +6551,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
continue;
ret = btrfs_trim_free_extents(device, &group_trimmed);
trimmed += group_trimmed;
if (ret) {
dev_failed++;
dev_ret = ret;
break;
}
trimmed += group_trimmed;
}
mutex_unlock(&fs_devices->device_list_mutex);

File diff suppressed because it is too large Load Diff

View File

@ -236,11 +236,11 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
kfree(changeset);
}
bool try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
bool try_release_extent_mapping(struct folio *folio, gfp_t mask);
int try_release_extent_buffer(struct folio *folio);
int btrfs_read_folio(struct file *file, struct folio *folio);
void extent_write_locked_range(struct inode *inode, const struct page *locked_page,
void extent_write_locked_range(struct inode *inode, const struct folio *locked_folio,
u64 start, u64 end, struct writeback_control *wbc,
bool pages_dirty);
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
@ -249,7 +249,7 @@ int btree_write_cache_pages(struct address_space *mapping,
void btrfs_readahead(struct readahead_control *rac);
int set_folio_extent_mapped(struct folio *folio);
int set_page_extent_mapped(struct page *page);
void clear_page_extent_mapped(struct page *page);
void clear_folio_extent_mapped(struct folio *folio);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, u64 owner_root, int level);
@ -354,7 +354,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
const struct page *locked_page,
const struct folio *locked_folio,
struct extent_state **cached,
u32 bits_to_clear, unsigned long page_ops);
int extent_invalidate_folio(struct extent_io_tree *tree,
@ -368,7 +368,7 @@ int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
struct folio *locked_folio, u64 *start,
u64 *end);
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,

View File

@ -192,10 +192,13 @@ static inline u64 extent_map_block_len(const struct extent_map *em)
static inline u64 extent_map_block_end(const struct extent_map *em)
{
if (extent_map_block_start(em) + extent_map_block_len(em) <
extent_map_block_start(em))
const u64 block_start = extent_map_block_start(em);
const u64 block_end = block_start + extent_map_block_len(em);
if (block_end < block_start)
return (u64)-1;
return extent_map_block_start(em) + extent_map_block_len(em);
return block_end;
}
static bool can_merge_extent_map(const struct extent_map *em)

View File

@ -151,7 +151,7 @@ static inline u32 max_ordered_sum_bytes(const struct btrfs_fs_info *fs_info)
* Calculate the total size needed to allocate for an ordered sum structure
* spanning @bytes in the file.
*/
static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes)
static int btrfs_ordered_sum_size(const struct btrfs_fs_info *fs_info, unsigned long bytes)
{
return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes);
}
@ -1272,7 +1272,7 @@ out:
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const struct btrfs_file_extent_item *fi,
struct extent_map *em)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;

View File

@ -74,7 +74,7 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
unsigned long *csum_bitmap);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const struct btrfs_file_extent_item *fi,
struct extent_map *em);
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);

View File

@ -1617,7 +1617,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
skip_ilock = true;
current->journal_info = NULL;
lockdep_assert_held(&inode->vfs_inode.i_rwsem);
btrfs_assert_inode_locked(inode);
}
trace_btrfs_sync_file(file, datasync);
@ -1920,8 +1920,8 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
reserved_space = PAGE_SIZE;
sb_start_pagefault(inode->i_sb);
page_start = page_offset(page);
page_end = page_start + PAGE_SIZE - 1;
page_start = folio_pos(folio);
page_end = page_start + folio_size(folio) - 1;
end = page_end;
/*
@ -1949,18 +1949,18 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
again:
down_read(&BTRFS_I(inode)->i_mmap_lock);
lock_page(page);
folio_lock(folio);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
if ((folio->mapping != inode->i_mapping) ||
(page_start >= size)) {
/* Page got truncated out from underneath us. */
goto out_unlock;
}
wait_on_page_writeback(page);
folio_wait_writeback(folio);
lock_extent(io_tree, page_start, page_end, &cached_state);
ret2 = set_page_extent_mapped(page);
ret2 = set_folio_extent_mapped(folio);
if (ret2 < 0) {
ret = vmf_error(ret2);
unlock_extent(io_tree, page_start, page_end, &cached_state);
@ -1974,14 +1974,14 @@ again:
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, PAGE_SIZE);
if (ordered) {
unlock_extent(io_tree, page_start, page_end, &cached_state);
unlock_page(page);
folio_unlock(folio);
up_read(&BTRFS_I(inode)->i_mmap_lock);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
}
if (page->index == ((size - 1) >> PAGE_SHIFT)) {
if (folio->index == ((size - 1) >> PAGE_SHIFT)) {
reserved_space = round_up(size - page_start, fs_info->sectorsize);
if (reserved_space < PAGE_SIZE) {
end = page_start + reserved_space - 1;
@ -2011,13 +2011,13 @@ again:
}
/* Page is wholly or partially inside EOF. */
if (page_start + PAGE_SIZE > size)
zero_start = offset_in_page(size);
if (page_start + folio_size(folio) > size)
zero_start = offset_in_folio(folio, size);
else
zero_start = PAGE_SIZE;
if (zero_start != PAGE_SIZE)
memzero_page(page, zero_start, PAGE_SIZE - zero_start);
folio_zero_range(folio, zero_start, folio_size(folio) - zero_start);
btrfs_folio_clear_checked(fs_info, folio, page_start, PAGE_SIZE);
btrfs_folio_set_dirty(fs_info, folio, page_start, end + 1 - page_start);
@ -2034,7 +2034,7 @@ again:
return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
folio_unlock(folio);
up_read(&BTRFS_I(inode)->i_mmap_lock);
out:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);

View File

@ -703,8 +703,8 @@ struct btrfs_fs_info {
* running.
*/
refcount_t scrub_workers_refcnt;
u32 sectors_per_page;
struct workqueue_struct *scrub_workers;
struct btrfs_subpage_info *subpage_info;
struct btrfs_discard_ctl discard_ctl;

View File

@ -14,7 +14,7 @@
#include "extent-tree.h"
#include "file-item.h"
struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
int slot,
const struct fscrypt_str *name)
{
@ -42,7 +42,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
}
struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
struct extent_buffer *leaf, int slot, u64 ref_objectid,
const struct extent_buffer *leaf, int slot, u64 ref_objectid,
const struct fscrypt_str *name)
{
struct btrfs_inode_extref *extref;
@ -423,9 +423,9 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
return ret;
}
static inline void btrfs_trace_truncate(struct btrfs_inode *inode,
struct extent_buffer *leaf,
struct btrfs_file_extent_item *fi,
static inline void btrfs_trace_truncate(const struct btrfs_inode *inode,
const struct extent_buffer *leaf,
const struct btrfs_file_extent_item *fi,
u64 offset, int extent_type, int slot)
{
if (!inode)

View File

@ -109,11 +109,11 @@ struct btrfs_inode_extref *btrfs_lookup_inode_extref(
u64 inode_objectid, u64 ref_objectid, int ins_len,
int cow);
struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
int slot,
const struct fscrypt_str *name);
struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
struct extent_buffer *leaf, int slot, u64 ref_objectid,
const struct extent_buffer *leaf, int slot, u64 ref_objectid,
const struct fscrypt_str *name);
#endif

View File

@ -116,7 +116,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
static noinline int run_delalloc_cow(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
struct folio *locked_folio, u64 start,
u64 end, struct writeback_control *wbc,
bool pages_dirty);
@ -393,17 +393,17 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags)
* extent (btrfs_finish_ordered_io()).
*/
static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
struct page *locked_page,
struct folio *locked_folio,
u64 offset, u64 bytes)
{
unsigned long index = offset >> PAGE_SHIFT;
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
u64 page_start = 0, page_end = 0;
struct page *page;
struct folio *folio;
if (locked_page) {
page_start = page_offset(locked_page);
page_end = page_start + PAGE_SIZE - 1;
if (locked_folio) {
page_start = folio_pos(locked_folio);
page_end = page_start + folio_size(locked_folio) - 1;
}
while (index <= end_index) {
@ -417,13 +417,13 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
* btrfs_mark_ordered_io_finished() would skip the accounting
* for the page range, and the ordered extent will never finish.
*/
if (locked_page && index == (page_start >> PAGE_SHIFT)) {
if (locked_folio && index == (page_start >> PAGE_SHIFT)) {
index++;
continue;
}
page = find_get_page(inode->vfs_inode.i_mapping, index);
folio = __filemap_get_folio(inode->vfs_inode.i_mapping, index, 0, 0);
index++;
if (!page)
if (IS_ERR(folio))
continue;
/*
@ -431,14 +431,14 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
* range, then btrfs_mark_ordered_io_finished() will handle
* the ordered extent accounting for the range.
*/
btrfs_folio_clamp_clear_ordered(inode->root->fs_info,
page_folio(page), offset, bytes);
put_page(page);
btrfs_folio_clamp_clear_ordered(inode->root->fs_info, folio,
offset, bytes);
folio_put(folio);
}
if (locked_page) {
if (locked_folio) {
/* The locked page covers the full range, nothing needs to be done */
if (bytes + offset <= page_start + PAGE_SIZE)
if (bytes + offset <= page_start + folio_size(locked_folio))
return;
/*
* In case this page belongs to the delalloc range being
@ -447,8 +447,9 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
* run_delalloc_range
*/
if (page_start >= offset && page_end <= (offset + bytes - 1)) {
bytes = offset + bytes - page_offset(locked_page) - PAGE_SIZE;
offset = page_offset(locked_page) + PAGE_SIZE;
bytes = offset + bytes - folio_pos(locked_folio) -
folio_size(locked_folio);
offset = folio_pos(locked_folio) + folio_size(locked_folio);
}
}
@ -494,7 +495,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct page *page = NULL;
const u32 sectorsize = trans->fs_info->sectorsize;
char *kaddr;
unsigned long ptr;
@ -554,12 +554,16 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_compression(leaf, ei,
compress_type);
} else {
page = find_get_page(inode->vfs_inode.i_mapping, 0);
struct folio *folio;
folio = __filemap_get_folio(inode->vfs_inode.i_mapping,
0, 0, 0);
ASSERT(!IS_ERR(folio));
btrfs_set_file_extent_compression(leaf, ei, 0);
kaddr = kmap_local_page(page);
kaddr = kmap_local_folio(folio, 0);
write_extent_buffer(leaf, kaddr, ptr, size);
kunmap_local(kaddr);
put_page(page);
folio_put(folio);
}
btrfs_mark_buffer_dirty(trans, leaf);
btrfs_release_path(path);
@ -715,7 +719,7 @@ out:
}
static noinline int cow_file_range_inline(struct btrfs_inode *inode,
struct page *locked_page,
struct folio *locked_folio,
u64 offset, u64 end,
size_t compressed_size,
int compress_type,
@ -740,13 +744,26 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode,
return ret;
}
/*
* In the successful case (ret == 0 here), cow_file_range will return 1.
*
* Quite a bit further up the callstack in extent_writepage(), ret == 1
* is treated as a short circuited success and does not unlock the folio,
* so we must do it here.
*
* In the failure case, the locked_folio does get unlocked by
* btrfs_folio_end_all_writers, which asserts that it is still locked
* at that point, so we must *not* unlock it here.
*
* The other two callsites in compress_file_range do not have a
* locked_folio, so they are not relevant to this logic.
*/
if (ret == 0)
locked_page = NULL;
locked_folio = NULL;
extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached,
clear_flags,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
extent_clear_unlock_delalloc(inode, offset, end, locked_folio, &cached,
clear_flags, PAGE_UNLOCK |
PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
return ret;
}
@ -762,7 +779,7 @@ struct async_extent {
struct async_chunk {
struct btrfs_inode *inode;
struct page *locked_page;
struct folio *locked_folio;
u64 start;
u64 end;
blk_opf_t write_flags;
@ -868,25 +885,25 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
/* If this is a small write inside eof, kick off a defrag */
if (num_bytes < small_write &&
(start > 0 || end + 1 < inode->disk_i_size))
btrfs_add_inode_defrag(NULL, inode, small_write);
btrfs_add_inode_defrag(inode, small_write);
}
static int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
struct folio *folio;
int ret = 0;
for (unsigned long index = start >> PAGE_SHIFT;
index <= end_index; index++) {
page = find_get_page(inode->i_mapping, index);
if (unlikely(!page)) {
folio = __filemap_get_folio(inode->i_mapping, index, 0, 0);
if (IS_ERR(folio)) {
if (!ret)
ret = -ENOENT;
ret = PTR_ERR(folio);
continue;
}
clear_page_dirty_for_io(page);
put_page(page);
folio_clear_dirty_for_io(folio);
folio_put(folio);
}
return ret;
}
@ -1122,7 +1139,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
static void submit_uncompressed_range(struct btrfs_inode *inode,
struct async_extent *async_extent,
struct page *locked_page)
struct folio *locked_folio)
{
u64 start = async_extent->start;
u64 end = async_extent->start + async_extent->ram_size - 1;
@ -1135,20 +1152,22 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
};
wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
ret = run_delalloc_cow(inode, locked_page, start, end, &wbc, false);
ret = run_delalloc_cow(inode, locked_folio, start, end,
&wbc, false);
wbc_detach_inode(&wbc);
if (ret < 0) {
btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
if (locked_page) {
const u64 page_start = page_offset(locked_page);
btrfs_cleanup_ordered_extents(inode, locked_folio,
start, end - start + 1);
if (locked_folio) {
const u64 page_start = folio_pos(locked_folio);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
btrfs_mark_ordered_io_finished(inode, locked_page,
folio_start_writeback(locked_folio);
folio_end_writeback(locked_folio);
btrfs_mark_ordered_io_finished(inode, locked_folio,
page_start, PAGE_SIZE,
!ret);
mapping_set_error(locked_page->mapping, ret);
unlock_page(locked_page);
mapping_set_error(locked_folio->mapping, ret);
folio_unlock(locked_folio);
}
}
}
@ -1164,7 +1183,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
struct btrfs_ordered_extent *ordered;
struct btrfs_file_extent file_extent;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct folio *locked_folio = NULL;
struct extent_state *cached = NULL;
struct extent_map *em;
int ret = 0;
@ -1175,19 +1194,20 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
kthread_associate_blkcg(async_chunk->blkcg_css);
/*
* If async_chunk->locked_page is in the async_extent range, we need to
* If async_chunk->locked_folio is in the async_extent range, we need to
* handle it.
*/
if (async_chunk->locked_page) {
u64 locked_page_start = page_offset(async_chunk->locked_page);
u64 locked_page_end = locked_page_start + PAGE_SIZE - 1;
if (async_chunk->locked_folio) {
u64 locked_folio_start = folio_pos(async_chunk->locked_folio);
u64 locked_folio_end = locked_folio_start +
folio_size(async_chunk->locked_folio) - 1;
if (!(start >= locked_page_end || end <= locked_page_start))
locked_page = async_chunk->locked_page;
if (!(start >= locked_folio_end || end <= locked_folio_start))
locked_folio = async_chunk->locked_folio;
}
if (async_extent->compress_type == BTRFS_COMPRESS_NONE) {
submit_uncompressed_range(inode, async_extent, locked_page);
submit_uncompressed_range(inode, async_extent, locked_folio);
goto done;
}
@ -1202,7 +1222,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
* non-contiguous space for the uncompressed size instead. So
* fall back to uncompressed.
*/
submit_uncompressed_range(inode, async_extent, locked_page);
submit_uncompressed_range(inode, async_extent, locked_folio);
goto done;
}
@ -1306,21 +1326,21 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* allocate extents on disk for the range, and create ordered data structs
* in ram to track those extents.
*
* locked_page is the page that writepage had locked already. We use
* locked_folio is the folio that writepage had locked already. We use
* it to make sure we don't do extra locks or unlocks.
*
* When this function fails, it unlocks all pages except @locked_page.
* When this function fails, it unlocks all pages except @locked_folio.
*
* When this function successfully creates an inline extent, it returns 1 and
* unlocks all pages including locked_page and starts I/O on them.
* (In reality inline extents are limited to a single page, so locked_page is
* unlocks all pages including locked_folio and starts I/O on them.
* (In reality inline extents are limited to a single page, so locked_folio is
* the only page handled anyway).
*
* When this function succeed and creates a normal extent, the page locking
* status depends on the passed in flags:
*
* - If @keep_locked is set, all pages are kept locked.
* - Else all pages except for @locked_page are unlocked.
* - Else all pages except for @locked_folio are unlocked.
*
* When a failure happens in the second or later iteration of the
* while-loop, the ordered extents created in previous iterations are kept
@ -1329,8 +1349,8 @@ u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* example.
*/
static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page, u64 start, u64 end,
u64 *done_offset,
struct folio *locked_folio, u64 start,
u64 end, u64 *done_offset,
bool keep_locked, bool no_inline)
{
struct btrfs_root *root = inode->root;
@ -1363,7 +1383,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
if (!no_inline) {
/* lets try to make an inline extent */
ret = cow_file_range_inline(inode, locked_page, start, end, 0,
ret = cow_file_range_inline(inode, locked_folio, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
if (ret <= 0) {
/*
@ -1500,7 +1520,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
page_ops |= PAGE_SET_ORDERED;
extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
locked_page, &cached,
locked_folio, &cached,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
if (num_bytes < cur_alloc_size)
@ -1553,13 +1573,13 @@ out_unlock:
* function.
*
* However, in case of @keep_locked, we still need to unlock the pages
* (except @locked_page) to ensure all the pages are unlocked.
* (except @locked_folio) to ensure all the pages are unlocked.
*/
if (keep_locked && orig_start < start) {
if (!locked_page)
if (!locked_folio)
mapping_set_error(inode->vfs_inode.i_mapping, ret);
extent_clear_unlock_delalloc(inode, orig_start, start - 1,
locked_page, NULL, 0, page_ops);
locked_folio, NULL, 0, page_ops);
}
/*
@ -1582,8 +1602,7 @@ out_unlock:
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
start + cur_alloc_size - 1,
locked_page, &cached,
clear_bits,
locked_folio, &cached, clear_bits,
page_ops);
btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
start += cur_alloc_size;
@ -1597,7 +1616,7 @@ out_unlock:
*/
if (start < end) {
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
extent_clear_unlock_delalloc(inode, start, end, locked_folio,
&cached, clear_bits, page_ops);
btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
}
@ -1651,7 +1670,7 @@ static noinline void submit_compressed_extents(struct btrfs_work *work, bool do_
}
static bool run_delalloc_compressed(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
struct folio *locked_folio, u64 start,
u64 end, struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@ -1691,15 +1710,16 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
INIT_LIST_HEAD(&async_chunk[i].extents);
/*
* The locked_page comes all the way from writepage and its
* the original page we were actually given. As we spread
* The locked_folio comes all the way from writepage and its
* the original folio we were actually given. As we spread
* this large delalloc region across multiple async_chunk
* structs, only the first struct needs a pointer to locked_page
* structs, only the first struct needs a pointer to
* locked_folio.
*
* This way we don't need racey decisions about who is supposed
* to unlock it.
*/
if (locked_page) {
if (locked_folio) {
/*
* Depending on the compressibility, the pages might or
* might not go through async. We want all of them to
@ -1709,12 +1729,12 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
* need full accuracy. Just account the whole thing
* against the first page.
*/
wbc_account_cgroup_owner(wbc, locked_page,
wbc_account_cgroup_owner(wbc, &locked_folio->page,
cur_end - start);
async_chunk[i].locked_page = locked_page;
locked_page = NULL;
async_chunk[i].locked_folio = locked_folio;
locked_folio = NULL;
} else {
async_chunk[i].locked_page = NULL;
async_chunk[i].locked_folio = NULL;
}
if (blkcg_css != blkcg_root_css) {
@ -1743,7 +1763,7 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
* covered by the range.
*/
static noinline int run_delalloc_cow(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
struct folio *locked_folio, u64 start,
u64 end, struct writeback_control *wbc,
bool pages_dirty)
{
@ -1751,20 +1771,21 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
int ret;
while (start <= end) {
ret = cow_file_range(inode, locked_page, start, end, &done_offset,
true, false);
ret = cow_file_range(inode, locked_folio, start, end,
&done_offset, true, false);
if (ret)
return ret;
extent_write_locked_range(&inode->vfs_inode, locked_page, start,
done_offset, wbc, pages_dirty);
extent_write_locked_range(&inode->vfs_inode, locked_folio,
start, done_offset, wbc, pages_dirty);
start = done_offset + 1;
}
return 1;
}
static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
const u64 start, const u64 end)
static int fallback_to_cow(struct btrfs_inode *inode,
struct folio *locked_folio, const u64 start,
const u64 end)
{
const bool is_space_ino = btrfs_is_free_space_inode(inode);
const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
@ -1833,7 +1854,8 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
* is written out and unlocked directly and a normal NOCOW extent
* doesn't work.
*/
ret = cow_file_range(inode, locked_page, start, end, NULL, false, true);
ret = cow_file_range(inode, locked_folio, start, end, NULL, false,
true);
ASSERT(ret != 1);
return ret;
}
@ -1987,7 +2009,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
* blocks on disk
*/
static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
struct page *locked_page,
struct folio *locked_folio,
const u64 start, const u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@ -2150,8 +2172,8 @@ must_cow:
* NOCOW, following one which needs to be COW'ed
*/
if (cow_start != (u64)-1) {
ret = fallback_to_cow(inode, locked_page,
cow_start, found_key.offset - 1);
ret = fallback_to_cow(inode, locked_folio, cow_start,
found_key.offset - 1);
cow_start = (u64)-1;
if (ret) {
btrfs_dec_nocow_writers(nocow_bg);
@ -2206,7 +2228,7 @@ must_cow:
btrfs_put_ordered_extent(ordered);
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, &cached_state,
locked_folio, &cached_state,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
PAGE_UNLOCK | PAGE_SET_ORDERED);
@ -2228,7 +2250,7 @@ must_cow:
if (cow_start != (u64)-1) {
cur_offset = end;
ret = fallback_to_cow(inode, locked_page, cow_start, end);
ret = fallback_to_cow(inode, locked_folio, cow_start, end);
cow_start = (u64)-1;
if (ret)
goto error;
@ -2255,7 +2277,7 @@ error:
lock_extent(&inode->io_tree, cur_offset, end, &cached);
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, &cached,
locked_folio, &cached,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@ -2282,39 +2304,39 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
* Function to process delayed allocation (create CoW) for ranges which are
* being touched for the first time.
*/
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_folio,
u64 start, u64 end, struct writeback_control *wbc)
{
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
int ret;
/*
* The range must cover part of the @locked_page, or a return of 1
* The range must cover part of the @locked_folio, or a return of 1
* can confuse the caller.
*/
ASSERT(!(end <= page_offset(locked_page) ||
start >= page_offset(locked_page) + PAGE_SIZE));
ASSERT(!(end <= folio_pos(locked_folio) ||
start >= folio_pos(locked_folio) + folio_size(locked_folio)));
if (should_nocow(inode, start, end)) {
ret = run_delalloc_nocow(inode, locked_page, start, end);
ret = run_delalloc_nocow(inode, locked_folio, start, end);
goto out;
}
if (btrfs_inode_can_compress(inode) &&
inode_need_compress(inode, start, end) &&
run_delalloc_compressed(inode, locked_page, start, end, wbc))
run_delalloc_compressed(inode, locked_folio, start, end, wbc))
return 1;
if (zoned)
ret = run_delalloc_cow(inode, locked_page, start, end, wbc,
ret = run_delalloc_cow(inode, locked_folio, start, end, wbc,
true);
else
ret = cow_file_range(inode, locked_page, start, end, NULL,
ret = cow_file_range(inode, locked_folio, start, end, NULL,
false, false);
out:
if (ret < 0)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
btrfs_cleanup_ordered_extents(inode, locked_folio, start,
end - start + 1);
return ret;
}
@ -2690,7 +2712,7 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
/* see btrfs_writepage_start_hook for details on why this is required */
struct btrfs_writepage_fixup {
struct page *page;
struct folio *folio;
struct btrfs_inode *inode;
struct btrfs_work work;
};
@ -2702,50 +2724,51 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
struct page *page = fixup->page;
struct folio *folio = fixup->folio;
struct btrfs_inode *inode = fixup->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 page_start = page_offset(page);
u64 page_end = page_offset(page) + PAGE_SIZE - 1;
u64 page_start = folio_pos(folio);
u64 page_end = folio_pos(folio) + folio_size(folio) - 1;
int ret = 0;
bool free_delalloc_space = true;
/*
* This is similar to page_mkwrite, we need to reserve the space before
* we take the page lock.
* we take the folio lock.
*/
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
PAGE_SIZE);
folio_size(folio));
again:
lock_page(page);
folio_lock(folio);
/*
* Before we queued this fixup, we took a reference on the page.
* page->mapping may go NULL, but it shouldn't be moved to a different
* Before we queued this fixup, we took a reference on the folio.
* folio->mapping may go NULL, but it shouldn't be moved to a different
* address space.
*/
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
if (!folio->mapping || !folio_test_dirty(folio) ||
!folio_test_checked(folio)) {
/*
* Unfortunately this is a little tricky, either
*
* 1) We got here and our page had already been dealt with and
* 1) We got here and our folio had already been dealt with and
* we reserved our space, thus ret == 0, so we need to just
* drop our space reservation and bail. This can happen the
* first time we come into the fixup worker, or could happen
* while waiting for the ordered extent.
* 2) Our page was already dealt with, but we happened to get an
* 2) Our folio was already dealt with, but we happened to get an
* ENOSPC above from the btrfs_delalloc_reserve_space. In
* this case we obviously don't have anything to release, but
* because the page was already dealt with we don't want to
* mark the page with an error, so make sure we're resetting
* because the folio was already dealt with we don't want to
* mark the folio with an error, so make sure we're resetting
* ret to 0. This is why we have this check _before_ the ret
* check, because we do not want to have a surprise ENOSPC
* when the page was already properly dealt with.
* when the folio was already properly dealt with.
*/
if (!ret) {
btrfs_delalloc_release_extents(inode, PAGE_SIZE);
btrfs_delalloc_release_extents(inode, folio_size(folio));
btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE,
page_start, folio_size(folio),
true);
}
ret = 0;
@ -2753,7 +2776,7 @@ again:
}
/*
* We can't mess with the page state unless it is locked, so now that
* We can't mess with the folio state unless it is locked, so now that
* it is locked bail if we failed to make our space reservation.
*/
if (ret)
@ -2762,14 +2785,14 @@ again:
lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
/* already ordered? We're done */
if (PageOrdered(page))
if (folio_test_ordered(folio))
goto out_reserved;
ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
unlock_extent(&inode->io_tree, page_start, page_end,
&cached_state);
unlock_page(page);
folio_unlock(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
goto again;
@ -2787,7 +2810,7 @@ again:
*
* The page was dirty when we started, nothing should have cleaned it.
*/
BUG_ON(!PageDirty(page));
BUG_ON(!folio_test_dirty(folio));
free_delalloc_space = false;
out_reserved:
btrfs_delalloc_release_extents(inode, PAGE_SIZE);
@ -2801,14 +2824,14 @@ out_page:
* We hit ENOSPC or other errors. Update the mapping and page
* to reflect the errors and clean the page.
*/
mapping_set_error(page->mapping, ret);
btrfs_mark_ordered_io_finished(inode, page, page_start,
PAGE_SIZE, !ret);
clear_page_dirty_for_io(page);
mapping_set_error(folio->mapping, ret);
btrfs_mark_ordered_io_finished(inode, folio, page_start,
folio_size(folio), !ret);
folio_clear_dirty_for_io(folio);
}
btrfs_folio_clear_checked(fs_info, page_folio(page), page_start, PAGE_SIZE);
unlock_page(page);
put_page(page);
btrfs_folio_clear_checked(fs_info, folio, page_start, PAGE_SIZE);
folio_unlock(folio);
folio_put(folio);
kfree(fixup);
extent_changeset_free(data_reserved);
/*
@ -2821,33 +2844,34 @@ out_page:
/*
* There are a few paths in the higher layers of the kernel that directly
* set the page dirty bit without asking the filesystem if it is a
* set the folio dirty bit without asking the filesystem if it is a
* good idea. This causes problems because we want to make sure COW
* properly happens and the data=ordered rules are followed.
*
* In our case any range that doesn't have the ORDERED bit set
* hasn't been properly setup for IO. We kick off an async process
* to fix it up. The async helper will wait for ordered extents, set
* the delalloc bit and make it safe to write the page.
* the delalloc bit and make it safe to write the folio.
*/
int btrfs_writepage_cow_fixup(struct page *page)
int btrfs_writepage_cow_fixup(struct folio *folio)
{
struct inode *inode = page->mapping->host;
struct inode *inode = folio->mapping->host;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_writepage_fixup *fixup;
/* This page has ordered extent covering it already */
if (PageOrdered(page))
/* This folio has ordered extent covering it already */
if (folio_test_ordered(folio))
return 0;
/*
* PageChecked is set below when we create a fixup worker for this page,
* don't try to create another one if we're already PageChecked()
* folio_checked is set below when we create a fixup worker for this
* folio, don't try to create another one if we're already
* folio_test_checked.
*
* The extent_io writepage code will redirty the page if we send back
* The extent_io writepage code will redirty the foio if we send back
* EAGAIN.
*/
if (PageChecked(page))
if (folio_test_checked(folio))
return -EAGAIN;
fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
@ -2857,14 +2881,14 @@ int btrfs_writepage_cow_fixup(struct page *page)
/*
* We are already holding a reference to this inode from
* write_cache_pages. We need to hold it because the space reservation
* takes place outside of the page lock, and we can't trust
* page->mapping outside of the page lock.
* takes place outside of the folio lock, and we can't trust
* page->mapping outside of the folio lock.
*/
ihold(inode);
btrfs_folio_set_checked(fs_info, page_folio(page), page_offset(page), PAGE_SIZE);
get_page(page);
btrfs_folio_set_checked(fs_info, folio, folio_pos(folio), folio_size(folio));
folio_get(folio);
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL);
fixup->page = page;
fixup->folio = folio;
fixup->inode = BTRFS_I(inode);
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
@ -6700,7 +6724,7 @@ static int btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
}
static noinline int uncompress_inline(struct btrfs_path *path,
struct page *page,
struct folio *folio,
struct btrfs_file_extent_item *item)
{
int ret;
@ -6722,7 +6746,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
read_extent_buffer(leaf, tmp, ptr, inline_size);
max_size = min_t(unsigned long, PAGE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page, 0, inline_size, max_size);
ret = btrfs_decompress(compress_type, tmp, folio, 0, inline_size,
max_size);
/*
* decompression code contains a memset to fill in any space between the end
@ -6733,36 +6758,36 @@ static noinline int uncompress_inline(struct btrfs_path *path,
*/
if (max_size < PAGE_SIZE)
memzero_page(page, max_size, PAGE_SIZE - max_size);
folio_zero_range(folio, max_size, PAGE_SIZE - max_size);
kfree(tmp);
return ret;
}
static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path,
struct page *page)
struct folio *folio)
{
struct btrfs_file_extent_item *fi;
void *kaddr;
size_t copy_size;
if (!page || PageUptodate(page))
if (!folio || folio_test_uptodate(folio))
return 0;
ASSERT(page_offset(page) == 0);
ASSERT(folio_pos(folio) == 0);
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_compression(path->nodes[0], fi) != BTRFS_COMPRESS_NONE)
return uncompress_inline(path, page, fi);
return uncompress_inline(path, folio, fi);
copy_size = min_t(u64, PAGE_SIZE,
btrfs_file_extent_ram_bytes(path->nodes[0], fi));
kaddr = kmap_local_page(page);
kaddr = kmap_local_folio(folio, 0);
read_extent_buffer(path->nodes[0], kaddr,
btrfs_file_extent_inline_start(fi), copy_size);
kunmap_local(kaddr);
if (copy_size < PAGE_SIZE)
memzero_page(page, copy_size, PAGE_SIZE - copy_size);
folio_zero_range(folio, copy_size, PAGE_SIZE - copy_size);
return 0;
}
@ -6784,7 +6809,7 @@ static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path
* Return: ERR_PTR on error, non-NULL extent_map on success.
*/
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, u64 start, u64 len)
struct folio *folio, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret = 0;
@ -6807,7 +6832,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
if (em) {
if (em->start > start || em->start + em->len <= start)
free_extent_map(em);
else if (em->disk_bytenr == EXTENT_MAP_INLINE && page)
else if (em->disk_bytenr == EXTENT_MAP_INLINE && folio)
free_extent_map(em);
else
goto out;
@ -6937,7 +6962,7 @@ next:
ASSERT(em->disk_bytenr == EXTENT_MAP_INLINE);
ASSERT(em->len == fs_info->sectorsize);
ret = read_inline_extent(inode, path, page);
ret = read_inline_extent(inode, path, folio);
if (ret < 0)
goto out;
goto insert;
@ -7179,13 +7204,12 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
* for subpage spinlock. So this function is to spin and wait for subpage
* spinlock.
*/
static void wait_subpage_spinlock(struct page *page)
static void wait_subpage_spinlock(struct folio *folio)
{
struct btrfs_fs_info *fs_info = page_to_fs_info(page);
struct folio *folio = page_folio(page);
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
struct btrfs_subpage *subpage;
if (!btrfs_is_subpage(fs_info, page->mapping))
if (!btrfs_is_subpage(fs_info, folio->mapping))
return;
ASSERT(folio_test_private(folio) && folio_get_private(folio));
@ -7214,9 +7238,9 @@ static int btrfs_launder_folio(struct folio *folio)
static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
{
if (try_release_extent_mapping(&folio->page, gfp_flags)) {
wait_subpage_spinlock(&folio->page);
clear_page_extent_mapped(&folio->page);
if (try_release_extent_mapping(folio, gfp_flags)) {
wait_subpage_spinlock(folio);
clear_folio_extent_mapped(folio);
return true;
}
return false;
@ -7276,7 +7300,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
* do double ordered extent accounting on the same folio.
*/
folio_wait_writeback(folio);
wait_subpage_spinlock(&folio->page);
wait_subpage_spinlock(folio);
/*
* For subpage case, we have call sites like
@ -7414,7 +7438,7 @@ next:
btrfs_folio_clear_checked(fs_info, folio, folio_pos(folio), folio_size(folio));
if (!inode_evicting)
__btrfs_release_folio(folio, GFP_NOFS);
clear_page_extent_mapped(&folio->page);
clear_folio_extent_mapped(folio);
}
static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
@ -8951,19 +8975,19 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
struct folio *folio;
u32 len;
ASSERT(end + 1 - start <= U32_MAX);
len = end + 1 - start;
while (index <= end_index) {
page = find_get_page(inode->vfs_inode.i_mapping, index);
ASSERT(page); /* Pages should be in the extent_io_tree */
folio = __filemap_get_folio(inode->vfs_inode.i_mapping, index, 0, 0);
ASSERT(!IS_ERR(folio)); /* folios should be in the extent_io_tree */
/* This is for data, which doesn't yet support larger folio. */
ASSERT(folio_order(page_folio(page)) == 0);
btrfs_folio_set_writeback(fs_info, page_folio(page), start, len);
put_page(page);
ASSERT(folio_order(folio) == 0);
btrfs_folio_set_writeback(fs_info, folio, start, len);
folio_put(folio);
index++;
}
}
@ -9128,7 +9152,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
atomic_inc(&priv.pending);
btrfs_submit_bio(bbio, 0);
btrfs_submit_bbio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
btrfs_encoded_read_endio, &priv);
@ -9143,7 +9167,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
} while (disk_io_size);
atomic_inc(&priv.pending);
btrfs_submit_bio(bbio, 0);
btrfs_submit_bbio(bbio, 0);
if (atomic_dec_return(&priv.pending))
io_wait_event(priv.wait, !atomic_read(&priv.pending));

View File

@ -543,13 +543,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(fs_info, &range);
if (ret < 0)
return ret;
if (copy_to_user(arg, &range, sizeof(range)))
return -EFAULT;
return 0;
return ret;
}
int __pure btrfs_is_empty_uuid(const u8 *uuid)
@ -4765,11 +4763,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
/*
* The transaction thread may want to do more work,
* namely it pokes the cleaner kthread that will start
* processing uncleaned subvols.
* There may be work for the cleaner kthread to do (subvolume
* deletion, delayed iputs, defrag inodes, etc), so wake it up.
*/
wake_up_process(fs_info->transaction_kthread);
wake_up_process(fs_info->cleaner_kthread);
return ret;
}
case BTRFS_IOC_START_SYNC:

View File

@ -438,11 +438,11 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
}
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page);
struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio);
const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
@ -467,22 +467,22 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
out_len = sectorsize;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (unlikely(ret != LZO_E_OK)) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
struct btrfs_inode *inode = folio_to_inode(dest_folio);
btrfs_err(fs_info,
"lzo decompression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
folio_pos(dest_folio));
ret = -EIO;
goto out;
}
ASSERT(out_len <= sectorsize);
memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len);
memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, out_len);
/* Early end, considered as an error. */
if (unlikely(out_len < destlen)) {
ret = -EIO;
memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len);
folio_zero_range(dest_folio, dest_pgoff + out_len, destlen - out_len);
}
out:
return ret;

View File

@ -332,7 +332,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
}
static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset,
struct folio *folio, u64 file_offset,
u64 len, bool uptodate)
{
struct btrfs_inode *inode = ordered->inode;
@ -340,10 +340,10 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
lockdep_assert_held(&inode->ordered_tree_lock);
if (page) {
ASSERT(page->mapping);
ASSERT(page_offset(page) <= file_offset);
ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
if (folio) {
ASSERT(folio->mapping);
ASSERT(folio_pos(folio) <= file_offset);
ASSERT(file_offset + len <= folio_pos(folio) + folio_size(folio));
/*
* Ordered (Private2) bit indicates whether we still have
@ -351,10 +351,9 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
*
* If there's no such bit, we need to skip to next range.
*/
if (!btrfs_folio_test_ordered(fs_info, page_folio(page),
file_offset, len))
if (!btrfs_folio_test_ordered(fs_info, folio, file_offset, len))
return false;
btrfs_folio_clear_ordered(fs_info, page_folio(page), file_offset, len);
btrfs_folio_clear_ordered(fs_info, folio, file_offset, len);
}
/* Now we're fine to update the accounting. */
@ -398,7 +397,7 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
}
void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len,
struct folio *folio, u64 file_offset, u64 len,
bool uptodate)
{
struct btrfs_inode *inode = ordered->inode;
@ -408,7 +407,8 @@ void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
spin_lock_irqsave(&inode->ordered_tree_lock, flags);
ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
ret = can_finish_ordered_extent(ordered, folio, file_offset, len,
uptodate);
spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
/*
@ -449,8 +449,8 @@ void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
/*
* Mark all ordered extents io inside the specified range finished.
*
* @page: The involved page for the operation.
* For uncompressed buffered IO, the page status also needs to be
* @folio: The involved folio for the operation.
* For uncompressed buffered IO, the folio status also needs to be
* updated to indicate whether the pending ordered io is finished.
* Can be NULL for direct IO and compressed write.
* For these cases, callers are ensured they won't execute the
@ -460,7 +460,7 @@ void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
* extent(s) covering it.
*/
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
struct page *page, u64 file_offset,
struct folio *folio, u64 file_offset,
u64 num_bytes, bool uptodate)
{
struct rb_node *node;
@ -524,7 +524,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
ASSERT(end + 1 - cur < U32_MAX);
len = end + 1 - cur;
if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
if (can_finish_ordered_extent(entry, folio, cur, len, uptodate)) {
spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
btrfs_queue_ordered_fn(entry);
spin_lock_irqsave(&inode->ordered_tree_lock, flags);
@ -1015,7 +1015,7 @@ void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
{
struct rb_node *n;
ASSERT(inode_is_locked(&inode->vfs_inode));
btrfs_assert_inode_locked(inode);
spin_lock_irq(&inode->ordered_tree_lock);
for (n = rb_first(&inode->ordered_tree); n; n = rb_next(n)) {

View File

@ -163,11 +163,11 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
struct btrfs_ordered_extent *entry);
void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len,
struct folio *folio, u64 file_offset, u64 len,
bool uptodate);
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
struct page *page, u64 file_offset,
u64 num_bytes, bool uptodate);
struct folio *folio, u64 file_offset,
u64 num_bytes, bool uptodate);
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);

View File

@ -9,9 +9,8 @@
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret = 0;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
@ -21,16 +20,13 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
btrfs_free_path(path);
return ret;
return btrfs_insert_empty_item(trans, root, path, &key, 0);
}
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret = 0;
@ -44,15 +40,9 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret) { /* JDM: Really? */
ret = -ENOENT;
goto out;
}
return ret;
if (ret)
return -ENOENT;
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
return ret;
return btrfs_del_item(trans, root, path);
}

View File

@ -1998,16 +1998,14 @@ out:
*
* Return 0 for success insert
* Return >0 for existing record, caller can free @record safely.
* Error is not possible
* Return <0 for insertion failure, caller can free @record safely.
*/
int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_qgroup_extent_record *record)
{
struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_qgroup_extent_record *entry;
u64 bytenr = record->bytenr;
struct btrfs_qgroup_extent_record *existing, *ret;
unsigned long bytenr = record->bytenr;
if (!btrfs_qgroup_full_accounting(fs_info))
return 1;
@ -2015,26 +2013,24 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
lockdep_assert_held(&delayed_refs->lock);
trace_btrfs_qgroup_trace_extent(fs_info, record);
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
node);
if (bytenr < entry->bytenr) {
p = &(*p)->rb_left;
} else if (bytenr > entry->bytenr) {
p = &(*p)->rb_right;
} else {
if (record->data_rsv && !entry->data_rsv) {
entry->data_rsv = record->data_rsv;
entry->data_rsv_refroot =
record->data_rsv_refroot;
}
return 1;
xa_lock(&delayed_refs->dirty_extents);
existing = xa_load(&delayed_refs->dirty_extents, bytenr);
if (existing) {
if (record->data_rsv && !existing->data_rsv) {
existing->data_rsv = record->data_rsv;
existing->data_rsv_refroot = record->data_rsv_refroot;
}
xa_unlock(&delayed_refs->dirty_extents);
return 1;
}
ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
xa_unlock(&delayed_refs->dirty_extents);
if (xa_is_err(ret)) {
qgroup_mark_inconsistent(fs_info);
return xa_err(ret);
}
rb_link_node(&record->node, parent_node, p);
rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
return 0;
}
@ -2141,6 +2137,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
if (!record)
return -ENOMEM;
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
kfree(record);
return -ENOMEM;
}
delayed_refs = &trans->transaction->delayed_refs;
record->bytenr = bytenr;
record->num_bytes = num_bytes;
@ -2149,7 +2150,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock);
if (ret > 0) {
if (ret) {
/* Clean up if insertion fails or item exists. */
xa_release(&delayed_refs->dirty_extents, record->bytenr);
kfree(record);
return 0;
}
@ -3018,7 +3021,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
struct btrfs_qgroup_extent_record *record;
struct btrfs_delayed_ref_root *delayed_refs;
struct ulist *new_roots = NULL;
struct rb_node *node;
unsigned long index;
u64 num_dirty_extents = 0;
u64 qgroup_to_skip;
int ret = 0;
@ -3028,10 +3031,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
delayed_refs = &trans->transaction->delayed_refs;
qgroup_to_skip = delayed_refs->qgroup_to_skip;
while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
xa_for_each(&delayed_refs->dirty_extents, index, record) {
num_dirty_extents++;
trace_btrfs_qgroup_account_extents(fs_info, record);
@ -3097,7 +3097,7 @@ cleanup:
ulist_free(record->old_roots);
ulist_free(new_roots);
new_roots = NULL;
rb_erase(node, &delayed_refs->dirty_extent_root);
xa_erase(&delayed_refs->dirty_extents, index);
kfree(record);
}
@ -4874,15 +4874,13 @@ out:
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
{
struct btrfs_qgroup_extent_record *entry;
struct btrfs_qgroup_extent_record *next;
struct rb_root *root;
unsigned long index;
root = &trans->delayed_refs.dirty_extent_root;
rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
xa_for_each(&trans->delayed_refs.dirty_extents, index, entry) {
ulist_free(entry->old_roots);
kfree(entry);
}
*root = RB_ROOT;
xa_destroy(&trans->delayed_refs.dirty_extents);
}
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)

View File

@ -125,7 +125,6 @@ struct btrfs_inode;
* Record a dirty extent, and info qgroup to update quota on it
*/
struct btrfs_qgroup_extent_record {
struct rb_node node;
u64 bytenr;
u64 num_bytes;

View File

@ -66,6 +66,11 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
if (ret)
break;
start += key.offset;
length -= key.offset;
if (length == 0)
break;
btrfs_release_path(path);
}
@ -73,6 +78,36 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
return ret;
}
static int update_raid_extent_item(struct btrfs_trans_handle *trans,
struct btrfs_key *key,
struct btrfs_stripe_extent *stripe_extent,
const size_t item_size)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
int ret;
int slot;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
0, 1);
if (ret)
return (ret == 1 ? ret : -EINVAL);
leaf = path->nodes[0];
slot = path->slots[0];
write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
item_size);
btrfs_mark_buffer_dirty(trans, leaf);
btrfs_free_path(path);
return ret;
}
static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
struct btrfs_io_context *bioc)
{
@ -112,6 +147,9 @@ static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
item_size);
if (ret == -EEXIST)
ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
item_size);
if (ret)
btrfs_abort_transaction(trans, ret);
@ -172,7 +210,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
if (!path)
return -ENOMEM;
if (stripe->is_scrub) {
if (stripe->rst_search_commit_root) {
path->skip_locking = 1;
path->search_commit_root = 1;
}
@ -245,10 +283,8 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
out:
if (ret > 0)
ret = -ENOENT;
if (ret && ret != -EIO && !stripe->is_scrub) {
if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
btrfs_print_tree(leaf, 1);
btrfs_err(fs_info,
if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
btrfs_debug(fs_info,
"cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
logical, logical + *length, stripe->dev->devid,
btrfs_bg_type_to_raid_name(map_type));

View File

@ -66,7 +66,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
const size_t inline_size = size - btrfs_file_extent_calc_inline_size(0);
char *data_start = inline_data + btrfs_file_extent_calc_inline_size(0);
struct extent_changeset *data_reserved = NULL;
struct page *page = NULL;
struct folio *folio = NULL;
struct address_space *mapping = inode->vfs_inode.i_mapping;
int ret;
@ -83,14 +83,15 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
if (ret)
goto out;
page = find_or_create_page(mapping, file_offset >> PAGE_SHIFT,
btrfs_alloc_write_mask(mapping));
if (!page) {
folio = __filemap_get_folio(mapping, file_offset >> PAGE_SHIFT,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
btrfs_alloc_write_mask(mapping));
if (IS_ERR(folio)) {
ret = -ENOMEM;
goto out_unlock;
}
ret = set_page_extent_mapped(page);
ret = set_folio_extent_mapped(folio);
if (ret < 0)
goto out_unlock;
@ -115,15 +116,15 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
if (comp_type == BTRFS_COMPRESS_NONE) {
memcpy_to_page(page, offset_in_page(file_offset), data_start,
datal);
memcpy_to_folio(folio, offset_in_folio(folio, file_offset), data_start,
datal);
} else {
ret = btrfs_decompress(comp_type, data_start, page,
offset_in_page(file_offset),
ret = btrfs_decompress(comp_type, data_start, folio,
offset_in_folio(folio, file_offset),
inline_size, datal);
if (ret)
goto out_unlock;
flush_dcache_page(page);
flush_dcache_folio(folio);
}
/*
@ -139,15 +140,15 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
* So what's in the range [500, 4095] corresponds to zeroes.
*/
if (datal < block_size)
memzero_page(page, datal, block_size - datal);
folio_zero_range(folio, datal, block_size - datal);
btrfs_folio_set_uptodate(fs_info, page_folio(page), file_offset, block_size);
btrfs_folio_clear_checked(fs_info, page_folio(page), file_offset, block_size);
btrfs_folio_set_dirty(fs_info, page_folio(page), file_offset, block_size);
btrfs_folio_set_uptodate(fs_info, folio, file_offset, block_size);
btrfs_folio_clear_checked(fs_info, folio, file_offset, block_size);
btrfs_folio_set_dirty(fs_info, folio, file_offset, block_size);
out_unlock:
if (page) {
unlock_page(page);
put_page(page);
if (!IS_ERR(folio)) {
folio_unlock(folio);
folio_put(folio);
}
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, file_offset,

View File

@ -36,6 +36,7 @@
#include "relocation.h"
#include "super.h"
#include "tree-checker.h"
#include "raid-stripe-tree.h"
/*
* Relocation overview
@ -2965,21 +2966,34 @@ static int relocate_one_folio(struct reloc_control *rc,
u64 folio_end;
u64 cur;
int ret;
const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags);
ASSERT(index <= last_index);
folio = filemap_lock_folio(inode->i_mapping, index);
if (IS_ERR(folio)) {
page_cache_sync_readahead(inode->i_mapping, ra, NULL,
index, last_index + 1 - index);
/*
* On relocation we're doing readahead on the relocation inode,
* but if the filesystem is backed by a RAID stripe tree we can
* get ENOENT (e.g. due to preallocated extents not being
* mapped in the RST) from the lookup.
*
* But readahead doesn't handle the error and submits invalid
* reads to the device, causing a assertion failures.
*/
if (!use_rst)
page_cache_sync_readahead(inode->i_mapping, ra, NULL,
index, last_index + 1 - index);
folio = __filemap_get_folio(inode->i_mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
mask);
if (IS_ERR(folio))
return PTR_ERR(folio);
}
WARN_ON(folio_order(folio));
if (folio_test_readahead(folio))
if (folio_test_readahead(folio) && !use_rst)
page_cache_async_readahead(inode->i_mapping, ra, NULL,
folio, last_index + 1 - index);

View File

@ -838,7 +838,7 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
bbio->bio.bi_iter.bi_size >= blocksize)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
btrfs_submit_bio(bbio, mirror);
btrfs_submit_bbio(bbio, mirror);
if (wait)
wait_scrub_stripe_io(stripe);
bbio = NULL;
@ -857,7 +857,7 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
btrfs_submit_bio(bbio, mirror);
btrfs_submit_bbio(bbio, mirror);
if (wait)
wait_scrub_stripe_io(stripe);
}
@ -1683,7 +1683,7 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
bbio->bio.bi_iter.bi_size >= stripe_len)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
btrfs_submit_bio(bbio, mirror);
btrfs_submit_bbio(bbio, mirror);
bbio = NULL;
}
@ -1694,7 +1694,7 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
(i << fs_info->sectorsize_bits);
int err;
io_stripe.is_scrub = true;
io_stripe.rst_search_commit_root = true;
stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
/*
* For RST cases, we need to manually split the bbio to
@ -1720,7 +1720,7 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
btrfs_submit_bio(bbio, mirror);
btrfs_submit_bbio(bbio, mirror);
}
if (atomic_dec_and_test(&stripe->pending_io)) {
@ -1776,7 +1776,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
mirror = calc_next_mirror(mirror, num_copies);
}
btrfs_submit_bio(bbio, mirror);
btrfs_submit_bbio(bbio, mirror);
}
static bool stripe_has_metadata_error(struct scrub_stripe *stripe)

View File

@ -62,7 +62,7 @@ struct fs_path {
/*
* Average path length does not exceed 200 bytes, we'll have
* better packing in the slab and higher chance to satisfy
* a allocation later during send.
* an allocation later during send.
*/
char pad[256];
};
@ -1136,7 +1136,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
/*
* Start with a small buffer (1 page). If later we end up needing more
* space, which can happen for xattrs on a fs with a leaf size greater
* then the page size, attempt to increase the buffer. Typically xattr
* than the page size, attempt to increase the buffer. Typically xattr
* values are small.
*/
buf_len = PATH_MAX;

View File

@ -163,7 +163,7 @@
* thing with or without extra unallocated space.
*/
u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
bool may_use_included)
{
ASSERT(s_info);
@ -368,7 +368,7 @@ static u64 calc_effective_data_chunk_size(struct btrfs_fs_info *fs_info)
}
static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
const struct btrfs_space_info *space_info,
enum btrfs_reserve_flush_enum flush)
{
u64 profile;
@ -437,7 +437,7 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
}
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes,
const struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
u64 avail;
@ -542,8 +542,8 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
}
static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info)
static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *info)
{
const char *flag_str = space_info_flag_to_str(info);
lockdep_assert_held(&info->lock);
@ -844,9 +844,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
return;
}
static inline u64
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info)
static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *space_info)
{
u64 used;
u64 avail;
@ -871,7 +870,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
}
static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info)
const struct btrfs_space_info *space_info)
{
const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
u64 ordered, delalloc;
@ -1943,7 +1942,7 @@ static u64 calc_unalloc_target(struct btrfs_fs_info *fs_info)
* Typically with 10 block groups as the target, the discrete values this comes
* out to are 0, 10, 20, ... , 80, 90, and 99.
*/
static int calc_dynamic_reclaim_threshold(struct btrfs_space_info *space_info)
static int calc_dynamic_reclaim_threshold(const struct btrfs_space_info *space_info)
{
struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
@ -1962,7 +1961,7 @@ static int calc_dynamic_reclaim_threshold(struct btrfs_space_info *space_info)
return calc_pct_ratio(want, target);
}
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info)
{
lockdep_assert_held(&space_info->lock);
@ -1985,7 +1984,7 @@ static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
return unalloc < data_chunk_size;
}
static void do_reclaim_sweep(struct btrfs_fs_info *fs_info,
static void do_reclaim_sweep(const struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, int raid)
{
struct btrfs_block_group *bg;
@ -2073,7 +2072,7 @@ bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
return ret;
}
void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info)
{
int raid;
struct btrfs_space_info *space_info;

View File

@ -217,7 +217,7 @@ struct reserve_ticket {
wait_queue_head_t wait;
};
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
@ -258,7 +258,7 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
u64 chunk_size);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
u64 flags);
u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
bool may_use_included);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
@ -271,7 +271,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info);
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes,
const struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush);
static inline void btrfs_space_info_free_bytes_may_use(
@ -293,7 +293,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info);
void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info);
#endif /* BTRFS_SPACE_INFO_H */

View File

@ -64,6 +64,7 @@
* This means a slightly higher tree locking latency.
*/
#if PAGE_SIZE > SZ_4K
bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping)
{
if (fs_info->sectorsize >= PAGE_SIZE)
@ -85,37 +86,7 @@ bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space
return true;
return false;
}
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
{
unsigned int cur = 0;
unsigned int nr_bits;
ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
nr_bits = PAGE_SIZE / sectorsize;
subpage_info->bitmap_nr_bits = nr_bits;
subpage_info->uptodate_offset = cur;
cur += nr_bits;
subpage_info->dirty_offset = cur;
cur += nr_bits;
subpage_info->writeback_offset = cur;
cur += nr_bits;
subpage_info->ordered_offset = cur;
cur += nr_bits;
subpage_info->checked_offset = cur;
cur += nr_bits;
subpage_info->locked_offset = cur;
cur += nr_bits;
subpage_info->total_nr_bits = cur;
}
#endif
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
struct folio *folio, enum btrfs_subpage_type type)
@ -163,7 +134,7 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
ASSERT(fs_info->sectorsize < PAGE_SIZE);
real_size = struct_size(ret, bitmaps,
BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));
BITS_TO_LONGS(btrfs_bitmap_nr_max * fs_info->sectors_per_page));
ret = kzalloc(real_size, GFP_NOFS);
if (!ret)
return ERR_PTR(-ENOMEM);
@ -246,7 +217,7 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
\
btrfs_subpage_assert(fs_info, folio, start, len); \
__start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
__start_bit += fs_info->subpage_info->name##_offset; \
__start_bit += fs_info->sectors_per_page * btrfs_bitmap_nr_##name; \
__start_bit; \
})
@ -351,6 +322,8 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
const int nbits = (len >> fs_info->sectorsize_bits);
unsigned long flags;
unsigned int cleared = 0;
int bit = start_bit;
bool last;
btrfs_subpage_assert(fs_info, folio, start, len);
@ -368,11 +341,12 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf
return true;
}
ASSERT(atomic_read(&subpage->writers) >= nbits);
/* The target range should have been locked. */
ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
bitmap_clear(subpage->bitmaps, start_bit, nbits);
last = atomic_sub_and_test(nbits, &subpage->writers);
for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) {
clear_bit(bit, subpage->bitmaps);
cleared++;
}
ASSERT(atomic_read(&subpage->writers) >= cleared);
last = atomic_sub_and_test(cleared, &subpage->writers);
spin_unlock_irqrestore(&subpage->lock, flags);
return last;
}
@ -404,27 +378,94 @@ int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
return 0;
}
/*
* Handle different locked folios:
*
* - Non-subpage folio
* Just unlock it.
*
* - folio locked but without any subpage locked
* This happens either before writepage_delalloc() or the delalloc range is
* already handled by previous folio.
* We can simple unlock it.
*
* - folio locked with subpage range locked.
* We go through the locked sectors inside the range and clear their locked
* bitmap, reduce the writer lock number, and unlock the page if that's
* the last locked range.
*/
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
ASSERT(folio_test_locked(folio));
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
/*
* For subpage case, there are two types of locked page. With or
* without writers number.
*
* Since we own the page lock, no one else could touch subpage::writers
* and we are safe to do several atomic operations without spinlock.
*/
if (atomic_read(&subpage->writers) == 0) {
/* No writers, locked by plain lock_page(). */
folio_unlock(folio);
return;
}
btrfs_subpage_clamp_range(folio, &start, &len);
if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len))
folio_unlock(folio);
}
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
unsigned long flags;
bool last = false;
int cleared = 0;
int bit;
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
if (atomic_read(&subpage->writers) == 0) {
/* No writers, locked by plain lock_page(). */
folio_unlock(folio);
return;
}
spin_lock_irqsave(&subpage->lock, flags);
for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
cleared++;
}
ASSERT(atomic_read(&subpage->writers) >= cleared);
last = atomic_sub_and_test(cleared, &subpage->writers);
spin_unlock_irqrestore(&subpage->lock, flags);
if (last)
folio_unlock(folio);
}
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
bitmap_test_range_all_set(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
fs_info->subpage_info->bitmap_nr_bits)
fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
fs_info->sectors_per_page)
#define subpage_test_bitmap_all_zero(fs_info, subpage, name) \
bitmap_test_range_all_zero(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
fs_info->subpage_info->bitmap_nr_bits)
fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
fs_info->sectors_per_page)
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
@ -728,53 +769,6 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
spin_unlock_irqrestore(&subpage->lock, flags);
}
/*
* Handle different locked pages with different page sizes:
*
* - Page locked by plain lock_page()
* It should not have any subpage::writers count.
* Can be unlocked by unlock_page().
* This is the most common locked page for __extent_writepage() called
* inside extent_write_cache_pages().
* Rarer cases include the @locked_page from extent_write_locked_range().
*
* - Page locked by lock_delalloc_pages()
* There is only one caller, all pages except @locked_page for
* extent_write_locked_range().
* In this case, we have to call subpage helper to handle the case.
*/
void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage;
ASSERT(folio_test_locked(folio));
/* For non-subpage case, we just unlock the page */
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
ASSERT(folio_test_private(folio) && folio_get_private(folio));
subpage = folio_get_private(folio);
/*
* For subpage case, there are two types of locked page. With or
* without writers number.
*
* Since we own the page lock, no one else could touch subpage::writers
* and we are safe to do several atomic operations without spinlock.
*/
if (atomic_read(&subpage->writers) == 0) {
/* No writers, locked by plain lock_page() */
folio_unlock(folio);
return;
}
/* Have writers, use proper subpage helper to end it */
btrfs_folio_end_writer_lock(fs_info, folio, start, len);
}
/*
* This is for folio already locked by plain lock_page()/folio_lock(), which
* doesn't have any subpage awareness.
@ -803,7 +797,7 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
bitmap_set(subpage->bitmaps, start_bit, nbits);
ret = atomic_add_return(nbits, &subpage->writers);
ASSERT(ret <= fs_info->subpage_info->bitmap_nr_bits);
ASSERT(ret <= fs_info->sectors_per_page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@ -819,14 +813,13 @@ bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 search_start,
u64 *found_start_ret, u32 *found_len_ret)
{
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
struct btrfs_subpage *subpage = folio_get_private(folio);
const u32 sectors_per_page = fs_info->sectors_per_page;
const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
locked, search_start, len);
const unsigned int locked_bitmap_start = subpage_info->locked_offset;
const unsigned int locked_bitmap_end = locked_bitmap_start +
subpage_info->bitmap_nr_bits;
const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked;
const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page;
unsigned long flags;
int first_zero;
int first_set;
@ -855,59 +848,21 @@ out:
return found;
}
/*
* Unlike btrfs_folio_end_writer_lock() which unlocks a specified subpage range,
* this ends all writer locked ranges of a page.
*
* This is for the locked page of __extent_writepage(), as the locked page
* can contain several locked subpage ranges.
*/
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
u64 folio_start = folio_pos(folio);
u64 cur = folio_start;
ASSERT(folio_test_locked(folio));
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
/* The page has no new delalloc range locked on it. Just plain unlock. */
if (atomic_read(&subpage->writers) == 0) {
folio_unlock(folio);
return;
}
while (cur < folio_start + PAGE_SIZE) {
u64 found_start;
u32 found_len;
bool found;
bool last;
found = btrfs_subpage_find_writer_locked(fs_info, folio, cur,
&found_start, &found_len);
if (!found)
break;
last = btrfs_subpage_end_and_test_writer(fs_info, folio,
found_start, found_len);
if (last) {
folio_unlock(folio);
break;
}
cur = found_start + found_len;
}
#define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \
{ \
const int sectors_per_page = fs_info->sectors_per_page; \
\
ASSERT(sectors_per_page < BITS_PER_LONG); \
*dst = bitmap_read(subpage->bitmaps, \
sectors_per_page * btrfs_bitmap_nr_##name, \
sectors_per_page); \
}
#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
bitmap_cut(dst, subpage->bitmaps, 0, \
subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
struct btrfs_subpage *subpage;
const u32 sectors_per_page = fs_info->sectors_per_page;
unsigned long uptodate_bitmap;
unsigned long dirty_bitmap;
unsigned long writeback_bitmap;
@ -916,25 +871,41 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
unsigned long flags;
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(subpage_info);
ASSERT(sectors_per_page > 1);
subpage = folio_get_private(folio);
spin_lock_irqsave(&subpage->lock, flags);
GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, locked, &checked_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, uptodate, &uptodate_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, &dirty_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap);
GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &checked_bitmap);
spin_unlock_irqrestore(&subpage->lock, flags);
dump_page(folio_page(folio, 0), "btrfs subpage dump");
btrfs_warn(fs_info,
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
start, len, folio_pos(folio),
subpage_info->bitmap_nr_bits, &uptodate_bitmap,
subpage_info->bitmap_nr_bits, &dirty_bitmap,
subpage_info->bitmap_nr_bits, &writeback_bitmap,
subpage_info->bitmap_nr_bits, &ordered_bitmap,
subpage_info->bitmap_nr_bits, &checked_bitmap);
sectors_per_page, &uptodate_bitmap,
sectors_per_page, &dirty_bitmap,
sectors_per_page, &writeback_bitmap,
sectors_per_page, &ordered_bitmap,
sectors_per_page, &checked_bitmap);
}
void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
struct folio *folio,
unsigned long *ret_bitmap)
{
struct btrfs_subpage *subpage;
unsigned long flags;
ASSERT(folio_test_private(folio) && folio_get_private(folio));
ASSERT(fs_info->sectors_per_page > 1);
subpage = folio_get_private(folio);
spin_lock_irqsave(&subpage->lock, flags);
GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, ret_bitmap);
spin_unlock_irqrestore(&subpage->lock, flags);
}

View File

@ -5,6 +5,7 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/sizes.h>
struct address_space;
struct folio;
@ -18,39 +19,23 @@ struct btrfs_fs_info;
*
* This structure records how they are organized in the bitmap:
*
* /- uptodate_offset /- dirty_offset /- ordered_offset
* /- uptodate /- dirty /- ordered
* | | |
* v v v
* |u|u|u|u|........|u|u|d|d|.......|d|d|o|o|.......|o|o|
* |<- bitmap_nr_bits ->|
* |<----------------- total_nr_bits ------------------>|
* |< sectors_per_page >|
*
* Unlike regular macro-like enums, here we do not go upper-case names, as
* these names will be utilized in various macros to define function names.
*/
struct btrfs_subpage_info {
/* Number of bits for each bitmap */
unsigned int bitmap_nr_bits;
/* Total number of bits for the whole bitmap */
unsigned int total_nr_bits;
/*
* *_offset indicates where the bitmap starts, the length is always
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
unsigned int dirty_offset;
unsigned int writeback_offset;
unsigned int ordered_offset;
unsigned int checked_offset;
/*
* For locked bitmaps, normally it's subpage representation for folio
* Locked flag, but metadata is different:
*
* - Metadata doesn't really lock the folio
* It's just to prevent page::private get cleared before the last
* end_page_read().
*/
unsigned int locked_offset;
enum {
btrfs_bitmap_nr_uptodate = 0,
btrfs_bitmap_nr_dirty,
btrfs_bitmap_nr_writeback,
btrfs_bitmap_nr_ordered,
btrfs_bitmap_nr_checked,
btrfs_bitmap_nr_locked,
btrfs_bitmap_nr_max
};
/*
@ -88,9 +73,16 @@ enum btrfs_subpage_type {
BTRFS_SUBPAGE_DATA,
};
#if PAGE_SIZE > SZ_4K
bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping);
#else
static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
struct address_space *mapping)
{
return false;
}
#endif
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
struct folio *folio, enum btrfs_subpage_type type);
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio);
@ -114,10 +106,11 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap);
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 search_start,
u64 *found_start_ret, u32 *found_len_ret);
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio);
/*
* Template for subpage related operations.
@ -164,8 +157,9 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
struct folio *folio,
unsigned long *ret_bitmap);
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);

View File

@ -180,7 +180,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
set_extent_bit(tmp, 0, sectorsize - 1, EXTENT_DELALLOC, NULL);
start = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
&end);
if (!found) {
test_err("should have found at least one delalloc");
@ -211,7 +211,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
set_extent_bit(tmp, sectorsize, max_bytes - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
&end);
if (!found) {
test_err("couldn't find delalloc in our range");
@ -245,7 +245,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
}
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
&end);
if (found) {
test_err("found range when we shouldn't have");
@ -266,7 +266,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
set_extent_bit(tmp, max_bytes, total_dirty - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
&end);
if (!found) {
test_err("didn't find our range");
@ -307,7 +307,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize)
* this changes at any point in the future we will need to fix this
* tests expected behavior.
*/
found = find_lock_delalloc_range(inode, locked_page, &start,
found = find_lock_delalloc_range(inode, page_folio(locked_page), &start,
&end);
if (!found) {
test_err("didn't find our range");

View File

@ -143,8 +143,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.href_root.rb_root));
WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.dirty_extent_root));
WARN_ON(!xa_empty(&transaction->delayed_refs.dirty_extents));
if (transaction->delayed_refs.pending_csums)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
@ -351,7 +350,7 @@ loop:
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
xa_init(&cur_trans->delayed_refs.dirty_extents);
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
/*

View File

@ -2877,7 +2877,7 @@ void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *tmp;
ASSERT(inode_is_locked(&ctx->inode->vfs_inode));
btrfs_assert_inode_locked(ctx->inode);
list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
list_del_init(&ordered->log_list);

View File

@ -170,7 +170,7 @@ static noinline int tree_mod_log_insert(struct btrfs_fs_info *fs_info,
* this until all tree mod log insertions are recorded in the rb tree and then
* write unlock fs_info::tree_mod_log_lock.
*/
static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, const struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return true;
@ -188,7 +188,7 @@ static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffe
/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
const struct extent_buffer *eb)
{
if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
return false;
@ -198,7 +198,7 @@ static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
return true;
}
static struct tree_mod_elem *alloc_tree_mod_elem(struct extent_buffer *eb,
static struct tree_mod_elem *alloc_tree_mod_elem(const struct extent_buffer *eb,
int slot,
enum btrfs_mod_log_op op)
{
@ -221,7 +221,7 @@ static struct tree_mod_elem *alloc_tree_mod_elem(struct extent_buffer *eb,
return tm;
}
int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
int btrfs_tree_mod_log_insert_key(const struct extent_buffer *eb, int slot,
enum btrfs_mod_log_op op)
{
struct tree_mod_elem *tm;
@ -258,7 +258,7 @@ out_unlock:
return ret;
}
static struct tree_mod_elem *tree_mod_log_alloc_move(struct extent_buffer *eb,
static struct tree_mod_elem *tree_mod_log_alloc_move(const struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items)
{
@ -278,7 +278,7 @@ static struct tree_mod_elem *tree_mod_log_alloc_move(struct extent_buffer *eb,
return tm;
}
int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
int btrfs_tree_mod_log_insert_move(const struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items)
{
@ -535,7 +535,7 @@ static struct tree_mod_elem *tree_mod_log_search(struct btrfs_fs_info *fs_info,
}
int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
struct extent_buffer *src,
const struct extent_buffer *src,
unsigned long dst_offset,
unsigned long src_offset,
int nr_items)

View File

@ -37,7 +37,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
struct extent_buffer *new_root,
bool log_removal);
int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
int btrfs_tree_mod_log_insert_key(const struct extent_buffer *eb, int slot,
enum btrfs_mod_log_op op);
int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb);
struct extent_buffer *btrfs_tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
@ -47,11 +47,11 @@ struct extent_buffer *btrfs_tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
struct extent_buffer *btrfs_get_old_root(struct btrfs_root *root, u64 time_seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
struct extent_buffer *src,
const struct extent_buffer *src,
unsigned long dst_offset,
unsigned long src_offset,
int nr_items);
int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
int btrfs_tree_mod_log_insert_move(const struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items);
u64 btrfs_tree_mod_log_lowest_seq(struct btrfs_fs_info *fs_info);

View File

@ -3,6 +3,7 @@
* Copyright (C) STRATO AG 2013. All rights reserved.
*/
#include <linux/kthread.h>
#include <linux/uuid.h>
#include <asm/unaligned.h>
#include "messages.h"
@ -12,6 +13,7 @@
#include "fs.h"
#include "accessors.h"
#include "uuid-tree.h"
#include "ioctl.h"
static void btrfs_uuid_to_key(const u8 *uuid, u8 type, struct btrfs_key *key)
{
@ -390,3 +392,180 @@ out:
btrfs_free_path(path);
return ret;
}
int btrfs_uuid_scan_kthread(void *data)
{
struct btrfs_fs_info *fs_info = data;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_key key;
struct btrfs_path *path = NULL;
int ret = 0;
struct extent_buffer *eb;
int slot;
struct btrfs_root_item root_item;
u32 item_size;
struct btrfs_trans_handle *trans = NULL;
bool closing = false;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
key.objectid = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
while (1) {
if (btrfs_fs_closing(fs_info)) {
closing = true;
break;
}
ret = btrfs_search_forward(root, &key, path,
BTRFS_OLDEST_GENERATION);
if (ret) {
if (ret > 0)
ret = 0;
break;
}
if (key.type != BTRFS_ROOT_ITEM_KEY ||
(key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
key.objectid != BTRFS_FS_TREE_OBJECTID) ||
key.objectid > BTRFS_LAST_FREE_OBJECTID)
goto skip;
eb = path->nodes[0];
slot = path->slots[0];
item_size = btrfs_item_size(eb, slot);
if (item_size < sizeof(root_item))
goto skip;
read_extent_buffer(eb, &root_item,
btrfs_item_ptr_offset(eb, slot),
(int)sizeof(root_item));
if (btrfs_root_refs(&root_item) == 0)
goto skip;
if (!btrfs_is_empty_uuid(root_item.uuid) ||
!btrfs_is_empty_uuid(root_item.received_uuid)) {
if (trans)
goto update_tree;
btrfs_release_path(path);
/*
* 1 - subvol uuid item
* 1 - received_subvol uuid item
*/
trans = btrfs_start_transaction(fs_info->uuid_root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
continue;
} else {
goto skip;
}
update_tree:
btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
key.objectid);
if (ret < 0) {
btrfs_warn(fs_info, "uuid_tree_add failed %d",
ret);
break;
}
}
if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
ret = btrfs_uuid_tree_add(trans,
root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
key.objectid);
if (ret < 0) {
btrfs_warn(fs_info, "uuid_tree_add failed %d",
ret);
break;
}
}
skip:
btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans);
trans = NULL;
if (ret)
break;
}
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
} else if (key.objectid < (u64)-1) {
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
key.objectid++;
} else {
break;
}
cond_resched();
}
out:
btrfs_free_path(path);
if (trans && !IS_ERR(trans))
btrfs_end_transaction(trans);
if (ret)
btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
else if (!closing)
set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
up(&fs_info->uuid_tree_rescan_sem);
return 0;
}
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *uuid_root;
struct task_struct *task;
int ret;
/*
* 1 - root node
* 1 - root item
*/
trans = btrfs_start_transaction(tree_root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
fs_info->uuid_root = uuid_root;
ret = btrfs_commit_transaction(trans);
if (ret)
return ret;
down(&fs_info->uuid_tree_rescan_sem);
task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
if (IS_ERR(task)) {
/* fs_info->update_uuid_tree_gen remains 0 in all error case */
btrfs_warn(fs_info, "failed to start uuid_scan task");
up(&fs_info->uuid_tree_rescan_sem);
return PTR_ERR(task);
}
return 0;
}

View File

@ -13,5 +13,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 typ
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_uuid_scan_kthread(void *data);
#endif

View File

@ -284,7 +284,7 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
* page and ignore dest, but it must still be non-NULL to avoid the
* counting-only behavior.
* @len: length in bytes to read
* @dest_page: copy into this page instead of the dest buffer
* @dest_folio: copy into this folio instead of the dest buffer
*
* Helper function to read items from the btree. This returns the number of
* bytes read or < 0 for errors. We can return short reads if the items don't
@ -294,7 +294,7 @@ static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
* Returns number of bytes read or a negative error code on failure.
*/
static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
char *dest, u64 len, struct page *dest_page)
char *dest, u64 len, struct folio *dest_folio)
{
struct btrfs_path *path;
struct btrfs_root *root = inode->root;
@ -314,7 +314,7 @@ static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
if (!path)
return -ENOMEM;
if (dest_page)
if (dest_folio)
path->reada = READA_FORWARD;
key.objectid = btrfs_ino(inode);
@ -371,15 +371,15 @@ static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
copy_offset = offset - key.offset;
if (dest) {
if (dest_page)
kaddr = kmap_local_page(dest_page);
if (dest_folio)
kaddr = kmap_local_folio(dest_folio, 0);
data = btrfs_item_ptr(leaf, path->slots[0], void);
read_extent_buffer(leaf, kaddr + dest_offset,
(unsigned long)data + copy_offset,
copy_bytes);
if (dest_page)
if (dest_folio)
kunmap_local(kaddr);
}
@ -460,7 +460,7 @@ static int rollback_verity(struct btrfs_inode *inode)
struct btrfs_root *root = inode->root;
int ret;
ASSERT(inode_is_locked(&inode->vfs_inode));
btrfs_assert_inode_locked(inode);
truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
ret = btrfs_drop_verity_items(inode);
@ -585,7 +585,7 @@ static int btrfs_begin_enable_verity(struct file *filp)
struct btrfs_trans_handle *trans;
int ret;
ASSERT(inode_is_locked(file_inode(filp)));
btrfs_assert_inode_locked(inode);
if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
return -EBUSY;
@ -633,7 +633,7 @@ static int btrfs_end_enable_verity(struct file *filp, const void *desc,
int ret = 0;
int rollback_ret;
ASSERT(inode_is_locked(file_inode(filp)));
btrfs_assert_inode_locked(inode);
if (desc == NULL)
goto rollback;
@ -762,7 +762,7 @@ again:
* [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
*/
ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
folio_address(folio), PAGE_SIZE, &folio->page);
folio_address(folio), PAGE_SIZE, folio);
if (ret < 0) {
folio_put(folio);
return ERR_PTR(ret);

View File

@ -476,6 +476,8 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
if (IS_ERR(*bdev_file)) {
ret = PTR_ERR(*bdev_file);
btrfs_err(NULL, "failed to open device for path %s with flags 0x%x: %d",
device_path, flags, ret);
goto error;
}
bdev = file_bdev(*bdev_file);
@ -4784,183 +4786,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
return 0;
}
int btrfs_uuid_scan_kthread(void *data)
{
struct btrfs_fs_info *fs_info = data;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_key key;
struct btrfs_path *path = NULL;
int ret = 0;
struct extent_buffer *eb;
int slot;
struct btrfs_root_item root_item;
u32 item_size;
struct btrfs_trans_handle *trans = NULL;
bool closing = false;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
key.objectid = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
while (1) {
if (btrfs_fs_closing(fs_info)) {
closing = true;
break;
}
ret = btrfs_search_forward(root, &key, path,
BTRFS_OLDEST_GENERATION);
if (ret) {
if (ret > 0)
ret = 0;
break;
}
if (key.type != BTRFS_ROOT_ITEM_KEY ||
(key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
key.objectid != BTRFS_FS_TREE_OBJECTID) ||
key.objectid > BTRFS_LAST_FREE_OBJECTID)
goto skip;
eb = path->nodes[0];
slot = path->slots[0];
item_size = btrfs_item_size(eb, slot);
if (item_size < sizeof(root_item))
goto skip;
read_extent_buffer(eb, &root_item,
btrfs_item_ptr_offset(eb, slot),
(int)sizeof(root_item));
if (btrfs_root_refs(&root_item) == 0)
goto skip;
if (!btrfs_is_empty_uuid(root_item.uuid) ||
!btrfs_is_empty_uuid(root_item.received_uuid)) {
if (trans)
goto update_tree;
btrfs_release_path(path);
/*
* 1 - subvol uuid item
* 1 - received_subvol uuid item
*/
trans = btrfs_start_transaction(fs_info->uuid_root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
continue;
} else {
goto skip;
}
update_tree:
btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
key.objectid);
if (ret < 0) {
btrfs_warn(fs_info, "uuid_tree_add failed %d",
ret);
break;
}
}
if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
ret = btrfs_uuid_tree_add(trans,
root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
key.objectid);
if (ret < 0) {
btrfs_warn(fs_info, "uuid_tree_add failed %d",
ret);
break;
}
}
skip:
btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans);
trans = NULL;
if (ret)
break;
}
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
} else if (key.objectid < (u64)-1) {
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
key.objectid++;
} else {
break;
}
cond_resched();
}
out:
btrfs_free_path(path);
if (trans && !IS_ERR(trans))
btrfs_end_transaction(trans);
if (ret)
btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
else if (!closing)
set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
up(&fs_info->uuid_tree_rescan_sem);
return 0;
}
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *uuid_root;
struct task_struct *task;
int ret;
/*
* 1 - root node
* 1 - root item
*/
trans = btrfs_start_transaction(tree_root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
fs_info->uuid_root = uuid_root;
ret = btrfs_commit_transaction(trans);
if (ret)
return ret;
down(&fs_info->uuid_tree_rescan_sem);
task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
if (IS_ERR(task)) {
/* fs_info->update_uuid_tree_gen remains 0 in all error case */
btrfs_warn(fs_info, "failed to start uuid_scan task");
up(&fs_info->uuid_tree_rescan_sem);
return PTR_ERR(task);
}
return 0;
}
/*
* shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks.
@ -5956,11 +5781,31 @@ void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info)
write_unlock(&fs_info->mapping_tree_lock);
}
static int btrfs_chunk_map_num_copies(const struct btrfs_chunk_map *map)
{
enum btrfs_raid_types index = btrfs_bg_flags_to_raid_index(map->type);
if (map->type & BTRFS_BLOCK_GROUP_RAID5)
return 2;
/*
* There could be two corrupted data stripes, we need to loop retry in
* order to rebuild the correct data.
*
* Fail a stripe at a time on every retry except the stripe under
* reconstruction.
*/
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
return map->num_stripes;
/* Non-RAID56, use their ncopies from btrfs_raid_array. */
return btrfs_raid_array[index].ncopies;
}
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
{
struct btrfs_chunk_map *map;
enum btrfs_raid_types index;
int ret = 1;
int ret;
map = btrfs_get_chunk_map(fs_info, logical, len);
if (IS_ERR(map))
@ -5972,22 +5817,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
*/
return 1;
index = btrfs_bg_flags_to_raid_index(map->type);
/* Non-RAID56, use their ncopies from btrfs_raid_array. */
if (!(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK))
ret = btrfs_raid_array[index].ncopies;
else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
ret = 2;
else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
/*
* There could be two corrupted data stripes, we need
* to loop retry in order to rebuild the correct data.
*
* Fail a stripe at a time on every retry except the
* stripe under reconstruction.
*/
ret = map->num_stripes;
ret = btrfs_chunk_map_num_copies(map);
btrfs_free_chunk_map(map);
return ret;
}
@ -6637,14 +6467,14 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom.stripe_index = 0;
io_geom.op = op;
num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
if (io_geom.mirror_num > num_copies)
return -EINVAL;
map = btrfs_get_chunk_map(fs_info, logical, *length);
if (IS_ERR(map))
return PTR_ERR(map);
num_copies = btrfs_chunk_map_num_copies(map);
if (io_geom.mirror_num > num_copies)
return -EINVAL;
map_offset = logical - map->start;
io_geom.raid56_full_stripe_start = (u64)-1;
max_len = btrfs_max_io_len(map, map_offset, &io_geom);

View File

@ -444,7 +444,7 @@ struct btrfs_io_stripe {
/* Block mapping. */
u64 physical;
u64 length;
bool is_scrub;
bool rst_search_commit_root;
/* For the endio handler. */
struct btrfs_io_context *bioc;
};
@ -725,8 +725,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_uuid_scan_kthread(void *data);
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,

View File

@ -120,7 +120,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
* locks the inode's i_mutex before calling setxattr or removexattr.
*/
if (flags & XATTR_REPLACE) {
ASSERT(inode_is_locked(inode));
btrfs_assert_inode_locked(BTRFS_I(inode));
di = btrfs_lookup_xattr(NULL, root, path,
btrfs_ino(BTRFS_I(inode)), name, name_len, 0);
if (!di)

View File

@ -20,6 +20,8 @@
#include <linux/refcount.h>
#include "btrfs_inode.h"
#include "compression.h"
#include "fs.h"
#include "subpage.h"
/* workspace buffer size for s390 zlib hardware support */
#define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE)
@ -108,6 +110,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long len = *total_out;
unsigned long nr_dest_folios = *out_folios;
const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
const u64 orig_end = start + len;
*out_folios = 0;
*total_out = 0;
@ -153,6 +156,10 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
if (in_buf_folios > 1) {
int i;
/* S390 hardware acceleration path, not subpage. */
ASSERT(!btrfs_is_subpage(
inode_to_fs_info(mapping->host),
mapping));
for (i = 0; i < in_buf_folios; i++) {
if (data_in) {
kunmap_local(data_in);
@ -167,9 +174,14 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
copy_page(workspace->buf + i * PAGE_SIZE,
data_in);
start += PAGE_SIZE;
workspace->strm.avail_in =
(in_buf_folios << PAGE_SHIFT);
}
workspace->strm.next_in = workspace->buf;
} else {
unsigned int pg_off;
unsigned int cur_len;
if (data_in) {
kunmap_local(data_in);
folio_put(in_folio);
@ -179,12 +191,13 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
start, &in_folio);
if (ret < 0)
goto out;
data_in = kmap_local_folio(in_folio, 0);
pg_off = offset_in_page(start);
cur_len = btrfs_calc_input_length(orig_end, start);
data_in = kmap_local_folio(in_folio, pg_off);
start += PAGE_SIZE;
workspace->strm.next_in = data_in;
workspace->strm.avail_in = cur_len;
}
workspace->strm.avail_in = min(bytes_left,
(unsigned long) workspace->buf_size);
}
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
@ -380,7 +393,7 @@ done:
}
int zlib_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
@ -408,12 +421,12 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
ret = zlib_inflateInit2(&workspace->strm, wbits);
if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
struct btrfs_inode *inode = folio_to_inode(dest_folio);
btrfs_err(inode->root->fs_info,
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
folio_pos(dest_folio));
return -EIO;
}
@ -426,16 +439,16 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
if (ret != Z_STREAM_END)
goto out;
memcpy_to_page(dest_page, dest_pgoff, workspace->buf, to_copy);
memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy);
out:
if (unlikely(to_copy != destlen)) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
struct btrfs_inode *inode = folio_to_inode(dest_folio);
btrfs_err(inode->root->fs_info,
"zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page), to_copy, destlen);
folio_pos(dest_folio), to_copy, destlen);
ret = -EIO;
} else {
ret = 0;
@ -444,7 +457,7 @@ out:
zlib_inflateEnd(&workspace->strm);
if (unlikely(to_copy < destlen))
memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
return ret;
}

View File

@ -287,7 +287,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
/* The emulated zone size is determined from the size of device extent */
static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_key key;
struct extent_buffer *leaf;
@ -304,28 +304,21 @@ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
return ret;
/* No dev extents at all? Not good */
if (ret > 0) {
ret = -EUCLEAN;
goto out;
}
if (ret > 0)
return -EUCLEAN;
}
leaf = path->nodes[0];
dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
fs_info->zone_size = btrfs_dev_extent_length(leaf, dext);
ret = 0;
out:
btrfs_free_path(path);
return ret;
return 0;
}
int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
@ -1211,7 +1204,7 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_root *root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
int ret;
@ -1246,7 +1239,7 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
if (!ret)
ret = -EUCLEAN;
if (ret < 0)
goto out;
return ret;
ret = btrfs_previous_extent_item(root, path, cache->start);
if (ret) {
@ -1254,7 +1247,7 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
ret = 0;
*offset_ret = 0;
}
goto out;
return ret;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
@ -1266,15 +1259,10 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
if (!(found_key.objectid >= cache->start &&
found_key.objectid + length <= cache->start + cache->length)) {
ret = -EUCLEAN;
goto out;
return -EUCLEAN;
}
*offset_ret = found_key.objectid + length - cache->start;
ret = 0;
out:
btrfs_free_path(path);
return ret;
return 0;
}
struct zone_info {
@ -2459,7 +2447,7 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
mutex_unlock(&fs_devices->device_list_mutex);
}
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;

View File

@ -89,7 +89,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb);
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info);
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
@ -242,7 +242,7 @@ static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
static inline bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
{
return false;
}

View File

@ -389,7 +389,10 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long tot_out = 0;
unsigned long len = *total_out;
const unsigned long nr_dest_folios = *out_folios;
const u64 orig_end = start + len;
unsigned long max_out = nr_dest_folios * PAGE_SIZE;
unsigned int pg_off;
unsigned int cur_len;
zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
len);
@ -415,9 +418,11 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
if (ret < 0)
goto out;
workspace->in_buf.src = kmap_local_folio(in_folio, 0);
pg_off = offset_in_page(start);
cur_len = btrfs_calc_input_length(orig_end, start);
workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
workspace->in_buf.size = cur_len;
/* Allocate and map in the output buffer */
out_folio = btrfs_alloc_compr_folio();
@ -494,14 +499,16 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
kunmap_local(workspace->in_buf.src);
workspace->in_buf.src = NULL;
folio_put(in_folio);
start += PAGE_SIZE;
len -= PAGE_SIZE;
start += cur_len;
len -= cur_len;
ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
if (ret < 0)
goto out;
workspace->in_buf.src = kmap_local_folio(in_folio, 0);
pg_off = offset_in_page(start);
cur_len = btrfs_calc_input_length(orig_end, start);
workspace->in_buf.src = kmap_local_folio(in_folio, pg_off);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
workspace->in_buf.size = cur_len;
}
}
while (1) {
@ -649,11 +656,11 @@ done:
}
int zstd_decompress(struct list_head *ws, const u8 *data_in,
struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
const u32 sectorsize = fs_info->sectorsize;
zstd_dstream *stream;
int ret = 0;
@ -662,12 +669,12 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
if (unlikely(!stream)) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
struct btrfs_inode *inode = folio_to_inode(dest_folio);
btrfs_err(inode->root->fs_info,
"zstd decompression init failed, root %llu inode %llu offset %llu",
btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
folio_pos(dest_folio));
ret = -EIO;
goto finish;
}
@ -686,21 +693,21 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
*/
ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
if (unlikely(zstd_is_error(ret))) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
struct btrfs_inode *inode = folio_to_inode(dest_folio);
btrfs_err(inode->root->fs_info,
"zstd decompression failed, error %d root %llu inode %llu offset %llu",
zstd_get_error_code(ret), btrfs_root_id(inode->root),
btrfs_ino(inode), page_offset(dest_page));
btrfs_ino(inode), folio_pos(dest_folio));
goto finish;
}
to_copy = workspace->out_buf.pos;
memcpy_to_page(dest_page, dest_pgoff, workspace->out_buf.dst, to_copy);
memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
finish:
/* Error or early end. */
if (unlikely(to_copy < destlen)) {
ret = -EIO;
memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
}
return ret;
}

View File

@ -674,10 +674,10 @@ TRACE_EVENT(btrfs_finish_ordered_extent,
DECLARE_EVENT_CLASS(btrfs__writepage,
TP_PROTO(const struct page *page, const struct inode *inode,
TP_PROTO(const struct folio *folio, const struct inode *inode,
const struct writeback_control *wbc),
TP_ARGS(page, inode, wbc),
TP_ARGS(folio, inode, wbc),
TP_STRUCT__entry_btrfs(
__field( u64, ino )
@ -695,7 +695,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->index = page->index;
__entry->index = folio->index;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
@ -721,12 +721,12 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
__entry->writeback_index)
);
DEFINE_EVENT(btrfs__writepage, __extent_writepage,
DEFINE_EVENT(btrfs__writepage, extent_writepage,
TP_PROTO(const struct page *page, const struct inode *inode,
TP_PROTO(const struct folio *folio, const struct inode *inode,
const struct writeback_control *wbc),
TP_ARGS(page, inode, wbc)
TP_ARGS(folio, inode, wbc)
);
TRACE_EVENT(btrfs_writepage_end_io_hook,
@ -1825,7 +1825,7 @@ TRACE_EVENT(qgroup_update_counters,
TRACE_EVENT(qgroup_update_reserve,
TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup,
s64 diff, int type),
TP_ARGS(fs_info, qgroup, diff, type),
@ -1851,7 +1851,7 @@ TRACE_EVENT(qgroup_update_reserve,
TRACE_EVENT(qgroup_meta_reserve,
TP_PROTO(struct btrfs_root *root, s64 diff, int type),
TP_PROTO(const struct btrfs_root *root, s64 diff, int type),
TP_ARGS(root, diff, type),
@ -1874,7 +1874,7 @@ TRACE_EVENT(qgroup_meta_reserve,
TRACE_EVENT(qgroup_meta_convert,
TP_PROTO(struct btrfs_root *root, s64 diff),
TP_PROTO(const struct btrfs_root *root, s64 diff),
TP_ARGS(root, diff),