Conflicts:
	fs/btrfs/inode.c

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2011-12-15 13:43:49 -05:00
commit 567a45e917
11 changed files with 212 additions and 118 deletions

View File

@ -64,6 +64,8 @@ struct btrfs_worker_thread {
int idle;
};
static int __btrfs_start_workers(struct btrfs_workers *workers);
/*
* btrfs_start_workers uses kthread_run, which can block waiting for memory
* for a very long time. It will actually throttle on page writeback,
@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
{
struct worker_start *start;
start = container_of(work, struct worker_start, work);
btrfs_start_workers(start->queue, 1);
__btrfs_start_workers(start->queue);
kfree(start);
}
static int start_new_worker(struct btrfs_workers *queue)
{
struct worker_start *start;
int ret;
start = kzalloc(sizeof(*start), GFP_NOFS);
if (!start)
return -ENOMEM;
start->work.func = start_new_worker_func;
start->queue = queue;
ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
if (ret)
kfree(start);
return ret;
}
/*
* helper function to move a thread onto the idle list after it
* has finished some requests.
@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
{
struct btrfs_workers *workers = worker->workers;
struct worker_start *start;
unsigned long flags;
rmb();
if (!workers->atomic_start_pending)
return;
start = kzalloc(sizeof(*start), GFP_NOFS);
if (!start)
return;
start->work.func = start_new_worker_func;
start->queue = workers;
spin_lock_irqsave(&workers->lock, flags);
if (!workers->atomic_start_pending)
goto out;
@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
workers->num_workers_starting += 1;
spin_unlock_irqrestore(&workers->lock, flags);
start_new_worker(workers);
btrfs_queue_worker(workers->atomic_worker_start, &start->work);
return;
out:
kfree(start);
spin_unlock_irqrestore(&workers->lock, flags);
}
@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
* starts new worker threads. This does not enforce the max worker
* count in case you need to temporarily go past it.
*/
static int __btrfs_start_workers(struct btrfs_workers *workers,
int num_workers)
static int __btrfs_start_workers(struct btrfs_workers *workers)
{
struct btrfs_worker_thread *worker;
int ret = 0;
int i;
for (i = 0; i < num_workers; i++) {
worker = kzalloc(sizeof(*worker), GFP_NOFS);
if (!worker) {
ret = -ENOMEM;
goto fail;
}
INIT_LIST_HEAD(&worker->pending);
INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
atomic_set(&worker->num_pending, 0);
atomic_set(&worker->refs, 1);
worker->workers = workers;
worker->task = kthread_run(worker_loop, worker,
"btrfs-%s-%d", workers->name,
workers->num_workers + i);
if (IS_ERR(worker->task)) {
ret = PTR_ERR(worker->task);
kfree(worker);
goto fail;
}
spin_lock_irq(&workers->lock);
list_add_tail(&worker->worker_list, &workers->idle_list);
worker->idle = 1;
workers->num_workers++;
workers->num_workers_starting--;
WARN_ON(workers->num_workers_starting < 0);
spin_unlock_irq(&workers->lock);
worker = kzalloc(sizeof(*worker), GFP_NOFS);
if (!worker) {
ret = -ENOMEM;
goto fail;
}
INIT_LIST_HEAD(&worker->pending);
INIT_LIST_HEAD(&worker->prio_pending);
INIT_LIST_HEAD(&worker->worker_list);
spin_lock_init(&worker->lock);
atomic_set(&worker->num_pending, 0);
atomic_set(&worker->refs, 1);
worker->workers = workers;
worker->task = kthread_run(worker_loop, worker,
"btrfs-%s-%d", workers->name,
workers->num_workers + 1);
if (IS_ERR(worker->task)) {
ret = PTR_ERR(worker->task);
kfree(worker);
goto fail;
}
spin_lock_irq(&workers->lock);
list_add_tail(&worker->worker_list, &workers->idle_list);
worker->idle = 1;
workers->num_workers++;
workers->num_workers_starting--;
WARN_ON(workers->num_workers_starting < 0);
spin_unlock_irq(&workers->lock);
return 0;
fail:
btrfs_stop_workers(workers);
spin_lock_irq(&workers->lock);
workers->num_workers_starting--;
spin_unlock_irq(&workers->lock);
return ret;
}
int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
int btrfs_start_workers(struct btrfs_workers *workers)
{
spin_lock_irq(&workers->lock);
workers->num_workers_starting += num_workers;
workers->num_workers_starting++;
spin_unlock_irq(&workers->lock);
return __btrfs_start_workers(workers, num_workers);
return __btrfs_start_workers(workers);
}
/*
@ -568,6 +561,7 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
struct btrfs_worker_thread *worker;
unsigned long flags;
struct list_head *fallback;
int ret;
again:
spin_lock_irqsave(&workers->lock, flags);
@ -584,7 +578,9 @@ again:
workers->num_workers_starting++;
spin_unlock_irqrestore(&workers->lock, flags);
/* we're below the limit, start another worker */
__btrfs_start_workers(workers, 1);
ret = __btrfs_start_workers(workers);
if (ret)
goto fallback;
goto again;
}
}
@ -665,7 +661,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
/*
* places a struct btrfs_work into the pending queue of one of the kthreads
*/
int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
{
struct btrfs_worker_thread *worker;
unsigned long flags;
@ -673,7 +669,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
/* don't requeue something already on a list */
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
goto out;
return;
worker = find_worker(workers);
if (workers->ordered) {
@ -712,7 +708,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
if (wake)
wake_up_process(worker->task);
spin_unlock_irqrestore(&worker->lock, flags);
out:
return 0;
}

View File

@ -109,8 +109,8 @@ struct btrfs_workers {
char *name;
};
int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
int btrfs_start_workers(struct btrfs_workers *workers);
int btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
struct btrfs_workers *async_starter);

View File

@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
void btrfs_dirty_inode(struct inode *inode, int flags);
int btrfs_dirty_inode(struct inode *inode);
int btrfs_update_time(struct file *file);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);

View File

@ -2194,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->endio_meta_write_workers.idle_thresh = 2;
fs_info->readahead_workers.idle_thresh = 2;
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->generic_worker, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
btrfs_start_workers(&fs_info->endio_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1);
btrfs_start_workers(&fs_info->caching_workers, 1);
btrfs_start_workers(&fs_info->readahead_workers, 1);
/*
* btrfs_start_workers can really only fail because of ENOMEM so just
* return -ENOMEM if any of these fail.
*/
ret = btrfs_start_workers(&fs_info->workers);
ret |= btrfs_start_workers(&fs_info->generic_worker);
ret |= btrfs_start_workers(&fs_info->submit_workers);
ret |= btrfs_start_workers(&fs_info->delalloc_workers);
ret |= btrfs_start_workers(&fs_info->fixup_workers);
ret |= btrfs_start_workers(&fs_info->endio_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
ret |= btrfs_start_workers(&fs_info->delayed_workers);
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
if (ret) {
ret = -ENOMEM;
goto fail_sb_buffer;
}
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,

View File

@ -2822,7 +2822,7 @@ out_free:
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
if (!ret)
if (!ret && dcs == BTRFS_DC_SETUP)
block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
@ -4203,12 +4203,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
u64 csum_bytes;
unsigned nr_extents = 0;
int extra_reserve = 0;
int flush = 1;
int ret;
/* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(root, inode))
flush = 0;
else
WARN_ON(!mutex_is_locked(&inode->i_mutex));
if (flush && btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
@ -4219,11 +4224,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
BTRFS_I(inode)->outstanding_extents++;
if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents) {
BTRFS_I(inode)->reserved_extents)
nr_extents = BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
BTRFS_I(inode)->reserved_extents += nr_extents;
}
/*
* Add an item to reserve for updating the inode when we complete the
@ -4231,11 +4234,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
*/
if (!BTRFS_I(inode)->delalloc_meta_reserved) {
nr_extents++;
BTRFS_I(inode)->delalloc_meta_reserved = 1;
extra_reserve = 1;
}
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@ -4245,22 +4249,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
to_free += btrfs_calc_trans_metadata_size(root, dropped);
/*
* Somebody could have come in and twiddled with the
* reservation, so if we have to free more than we would have
* reserved from this reservation go ahead and release those
* bytes.
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
* so we can just reduce our inodes csum bytes and carry on.
* Otherwise we have to do the normal free thing to account for
* the case that the free side didn't free up its reserve
* because of this outstanding reservation.
*/
to_free -= to_reserve;
if (BTRFS_I(inode)->csum_bytes == csum_bytes)
calc_csum_metadata_size(inode, num_bytes, 0);
else
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
if (dropped)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
if (to_free)
btrfs_block_rsv_release(root, block_rsv, to_free);
return ret;
}
spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) {
BTRFS_I(inode)->delalloc_meta_reserved = 1;
nr_extents--;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
return 0;

View File

@ -1387,7 +1387,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
goto out;
}
file_update_time(file);
err = btrfs_update_time(file);
if (err) {
mutex_unlock(&inode->i_mutex);
goto out;
}
BTRFS_I(inode)->sequence++;
start_pos = round_down(pos, root->sectorsize);

View File

@ -38,6 +38,7 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
BUG_ON(ret);
BUG_ON(ret && ret != -EEXIST);
}
/* insert an orphan item to track subvolume contains orphan files */
@ -2223,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
/*
* Need to hold the imutex for reservation purposes, not
* a huge deal here but I have a WARN_ON in
* btrfs_delalloc_reserve_space to catch offenders.
*/
mutex_lock(&inode->i_mutex);
ret = btrfs_truncate(inode);
mutex_unlock(&inode->i_mutex);
} else {
nr_unlink++;
}
@ -3426,7 +3434,6 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
i_size_write(inode, newsize);
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_transaction_throttle(trans, root);
} else {
@ -3467,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid) {
setattr_copy(inode, attr);
mark_inode_dirty(inode);
err = btrfs_dirty_inode(inode);
if (attr->ia_valid & ATTR_MODE)
if (!err && attr->ia_valid & ATTR_MODE)
err = btrfs_acl_chmod(inode);
}
@ -4245,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
* FIXME, needs more benchmarking...there are no reasons other than performance
* to keep or drop this code.
*/
void btrfs_dirty_inode(struct inode *inode, int flags)
int btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;
if (BTRFS_I(inode)->dummy_inode)
return;
return 0;
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
if (ret && ret == -ENOSPC) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
printk_ratelimited(KERN_ERR "btrfs: fail to "
"dirty inode %llu error %ld\n",
(unsigned long long)btrfs_ino(inode),
PTR_ERR(trans));
return;
}
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
printk_ratelimited(KERN_ERR "btrfs: fail to "
"dirty inode %llu error %d\n",
(unsigned long long)btrfs_ino(inode),
ret);
}
}
btrfs_end_transaction(trans, root);
if (BTRFS_I(inode)->delayed_node)
btrfs_balance_delayed_items(root);
return ret;
}
/*
* This is a copy of file_update_time. We need this so we can return error on
* ENOSPC for updating the inode in the case of file write and mmap writes.
*/
int btrfs_update_time(struct file *file)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
int ret;
enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
/* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return 0;
now = current_fs_time(inode->i_sb);
if (!timespec_equal(&inode->i_mtime, &now))
sync_it = S_MTIME;
if (!timespec_equal(&inode->i_ctime, &now))
sync_it |= S_CTIME;
if (IS_I_VERSION(inode))
sync_it |= S_VERSION;
if (!sync_it)
return 0;
/* Finally allowed to write? Takes lock. */
if (mnt_want_write_file(file))
return 0;
/* Only change inode inside the lock region */
if (sync_it & S_VERSION)
inode_inc_iversion(inode);
if (sync_it & S_CTIME)
inode->i_ctime = now;
if (sync_it & S_MTIME)
inode->i_mtime = now;
ret = btrfs_dirty_inode(inode);
if (!ret)
mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
return ret;
}
/*
@ -6358,7 +6403,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
/* Need this to keep space reservations serialized */
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
mutex_unlock(&inode->i_mutex);
if (!ret)
ret = btrfs_update_time(vma->vm_file);
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
@ -6570,8 +6620,9 @@ static int btrfs_truncate(struct inode *inode)
/* Just need the 1 for updating the inode */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
ret = err = PTR_ERR(trans);
trans = NULL;
break;
}
}
@ -7415,6 +7466,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,

View File

@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
return 0;
file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
mutex_unlock(&inode->i_mutex);
if (ret)
return ret;
again:

View File

@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
while (index <= last_index) {
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
mutex_unlock(&inode->i_mutex);
if (ret)
goto out;

View File

@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
mutex_lock(&fs_info->scrub_lock);
if (fs_info->scrub_workers_refcnt == 0) {
btrfs_init_workers(&fs_info->scrub_workers, "scrub",
fs_info->thread_pool_size, &fs_info->generic_worker);
fs_info->scrub_workers.idle_thresh = 4;
btrfs_start_workers(&fs_info->scrub_workers, 1);
ret = btrfs_start_workers(&fs_info->scrub_workers);
if (ret)
goto out;
}
++fs_info->scrub_workers_refcnt;
out:
mutex_unlock(&fs_info->scrub_lock);
return 0;
return ret;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)

View File

@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/cleancache.h>
#include <linux/mnt_namespace.h>
#include <linux/ratelimit.h>
#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
@ -1297,6 +1298,16 @@ static int btrfs_unfreeze(struct super_block *sb)
return 0;
}
static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
{
int ret;
ret = btrfs_dirty_inode(inode);
if (ret)
printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
"error %d\n", btrfs_ino(inode), ret);
}
static const struct super_operations btrfs_super_ops = {
.drop_inode = btrfs_drop_inode,
.evict_inode = btrfs_evict_inode,
@ -1304,7 +1315,7 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.write_inode = btrfs_write_inode,
.dirty_inode = btrfs_dirty_inode,
.dirty_inode = btrfs_fs_dirty_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,