btrfs: remove btrfs_end_io_wq

All reads bio that go through btrfs_map_bio need to be completed in
user context.  And read I/Os are the most common and timing critical
in almost any file system workloads.

Embed a work_struct into struct btrfs_bio and use it to complete all
read bios submitted through btrfs_map, using the REQ_META flag to decide
which workqueue they are placed on.

This removes the need for a separate 128 byte allocation (typically
rounded up to 192 bytes by slab) for all reads with a size increase
of 24 bytes for struct btrfs_bio.  Future patches will reorganize
struct btrfs_bio to make use of this extra space for writes as well.

(All sizes are based a on typical 64-bit non-debug build)

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Christoph Hellwig 2022-05-26 09:36:40 +02:00 committed by David Sterba
parent 08a6f46434
commit d7b9416fe5
8 changed files with 42 additions and 167 deletions

View File

@ -931,10 +931,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
sums += fs_info->csum_size * nr_sectors;
ASSERT(comp_bio->bi_iter.bi_size);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
if (ret)
goto finish_cb;
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret)
goto finish_cb;

View File

@ -850,8 +850,8 @@ struct btrfs_fs_info {
struct btrfs_workqueue *hipri_workers;
struct btrfs_workqueue *delalloc_workers;
struct btrfs_workqueue *flush_workers;
struct btrfs_workqueue *endio_workers;
struct btrfs_workqueue *endio_meta_workers;
struct workqueue_struct *endio_workers;
struct workqueue_struct *endio_meta_workers;
struct workqueue_struct *endio_raid56_workers;
struct workqueue_struct *rmw_workers;
struct workqueue_struct *compressed_write_workers;

View File

@ -51,7 +51,6 @@
BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
static void end_workqueue_fn(struct btrfs_work *work);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
/*
* btrfs_end_io_wq structs are used to do processing in task context when an IO
* is complete. This is used during reads to verify checksums, and it is used
* by writes to insert metadata for new file extents after IO is complete.
*/
struct btrfs_end_io_wq {
struct bio *bio;
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
blk_status_t status;
enum btrfs_wq_endio_type metadata;
struct btrfs_work work;
};
static struct kmem_cache *btrfs_end_io_wq_cache;
int __init btrfs_end_io_wq_init(void)
{
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
sizeof(struct btrfs_end_io_wq),
0,
SLAB_MEM_SPREAD,
NULL);
if (!btrfs_end_io_wq_cache)
return -ENOMEM;
return 0;
}
void __cold btrfs_end_io_wq_exit(void)
{
kmem_cache_destroy(btrfs_end_io_wq_cache);
}
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{
if (fs_info->csum_shash)
@ -740,48 +705,6 @@ err:
return ret;
}
static void end_workqueue_bio(struct bio *bio)
{
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
struct btrfs_workqueue *wq;
fs_info = end_io_wq->info;
end_io_wq->status = bio->bi_status;
if (end_io_wq->metadata)
wq = fs_info->endio_meta_workers;
else
wq = fs_info->endio_workers;
btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
btrfs_queue_work(wq, &end_io_wq->work);
}
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
return BLK_STS_IOERR;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
bio->bi_private = end_io_wq;
bio->bi_end_io = end_workqueue_bio;
return 0;
}
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
@ -917,13 +840,6 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
bio->bi_opf |= REQ_META;
if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
*/
ret = btrfs_bio_wq_end_io(fs_info, bio,
BTRFS_WQ_ENDIO_METADATA);
if (!ret)
ret = btrfs_map_bio(fs_info, bio, mirror_num);
} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
ret = btree_csum_one_bio(bio);
@ -1947,25 +1863,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
return root;
}
/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens
*/
static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
bio_endio(bio);
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
}
static int cleaner_kthread(void *arg)
{
struct btrfs_fs_info *fs_info = arg;
@ -2272,7 +2169,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->hipri_workers);
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
if (fs_info->endio_workers)
destroy_workqueue(fs_info->endio_workers);
if (fs_info->endio_raid56_workers)
destroy_workqueue(fs_info->endio_raid56_workers);
if (fs_info->rmw_workers)
@ -2292,7 +2190,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
* the queues used for metadata I/O, since tasks from those other work
* queues can do metadata I/O operations.
*/
btrfs_destroy_workqueue(fs_info->endio_meta_workers);
if (fs_info->endio_meta_workers)
destroy_workqueue(fs_info->endio_meta_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@ -2471,15 +2370,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
fs_info->fixup_workers =
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
/*
* endios are largely parallel and should have a very
* low idle thresh
*/
fs_info->endio_workers =
btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
alloc_workqueue("btrfs-endio", flags, max_active);
fs_info->endio_meta_workers =
btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
max_active, 4);
alloc_workqueue("btrfs-endio-meta", flags, max_active);
fs_info->endio_raid56_workers =
alloc_workqueue("btrfs-endio-raid56", flags, max_active);
fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);

View File

@ -17,12 +17,6 @@
*/
#define BTRFS_BDEV_BLOCKSIZE (4096)
enum btrfs_wq_endio_type {
BTRFS_WQ_ENDIO_DATA,
BTRFS_WQ_ENDIO_METADATA,
BTRFS_WQ_ENDIO_FREE_SPACE,
};
static inline u64 btrfs_sb_offset(int mirror)
{
u64 start = SZ_16K;
@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
int level, struct btrfs_key *first_key);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
int mirror_num, u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start);
@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_init_root_free_objectid(struct btrfs_root *root);
int __init btrfs_end_io_wq_init(void);
void __cold btrfs_end_io_wq_exit(void);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(u64 objectid,

View File

@ -2640,12 +2640,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
return;
}
ret = btrfs_bio_wq_end_io(fs_info, bio,
btrfs_is_free_space_inode(BTRFS_I(inode)) ?
BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
if (ret)
goto out;
/*
* Lookup bio sums does extra checks around whether we need to csum or
* not, which is why we ignore skip_sum here.
@ -7879,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
BUG_ON(bio_op(bio) == REQ_OP_WRITE);
if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
return;
refcount_inc(&dip->refs);
if (btrfs_map_bio(fs_info, bio, mirror_num))
refcount_dec(&dip->refs);
@ -7970,19 +7961,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
blk_status_t ret;
if (!write) {
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
if (ret)
return ret;
}
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
goto map;
if (write) {
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
/* Check btrfs_submit_data_write_bio() for async submit rules */
if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
@ -10314,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
return ret;
}
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
if (ret) {
btrfs_bio_free_csum(bbio);
return ret;
}
atomic_inc(&priv->pending);
ret = btrfs_map_bio(fs_info, bio, mirror_num);
if (ret) {

View File

@ -1932,8 +1932,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
@ -2702,13 +2700,9 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_delayed_ref;
err = btrfs_end_io_wq_init();
if (err)
goto free_prelim_ref;
err = btrfs_interface_init();
if (err)
goto free_end_io_wq;
goto free_prelim_ref;
btrfs_print_mod_info();
@ -2724,8 +2718,6 @@ static int __init init_btrfs_fs(void)
unregister_ioctl:
btrfs_interface_exit();
free_end_io_wq:
btrfs_end_io_wq_exit();
free_prelim_ref:
btrfs_prelim_ref_exit();
free_delayed_ref:
@ -2763,7 +2755,6 @@ static void __exit exit_btrfs_fs(void)
extent_state_cache_exit();
extent_io_exit();
btrfs_interface_exit();
btrfs_end_io_wq_exit();
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();

View File

@ -6616,11 +6616,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
}
static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
{
if (bioc->orig_bio->bi_opf & REQ_META)
return bioc->fs_info->endio_meta_workers;
return bioc->fs_info->endio_workers;
}
static void btrfs_end_bio_work(struct work_struct *work)
{
struct btrfs_bio *bbio =
container_of(work, struct btrfs_bio, end_io_work);
bio_endio(&bbio->bio);
}
static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
{
struct bio *orig_bio = bioc->orig_bio;
struct btrfs_bio *bbio = btrfs_bio(orig_bio);
btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num;
bbio->mirror_num = bioc->mirror_num;
orig_bio->bi_private = bioc->private;
orig_bio->bi_end_io = bioc->end_io;
@ -6632,7 +6648,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc)
orig_bio->bi_status = BLK_STS_IOERR;
else
orig_bio->bi_status = BLK_STS_OK;
if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
} else {
bio_endio(orig_bio);
}
btrfs_put_bioc(bioc);
}
@ -6664,7 +6687,7 @@ static void btrfs_end_bio(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc);
btrfs_end_bioc(bioc, true);
}
static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio,
@ -6762,7 +6785,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
atomic_inc(&bioc->error);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc);
btrfs_end_bioc(bioc, false);
continue;
}

View File

@ -371,6 +371,9 @@ struct btrfs_bio {
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
struct bvec_iter iter;
/* For read end I/O handling */
struct work_struct end_io_work;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.