mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-11 07:04:04 +08:00
for-5.1-part1-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlx9czQACgkQxWXV+ddt WDvC9w/8CxJf1/eZBqb+b+aA38kgZhoaNixMud/IW/IFmIlicX0PoDxk6dh1ZTA+ 3uej/7fyfwjNCVvtrPVVxdT8zhZgyJouHrbhG1PlDWtmTEV2VqV5pBG1xQtCwmZy oinQI5oYYM5Le5EXxRGH8TQs6Z3tFuLx2kcrVWBLFKoZ2kZBZxe6KykGF9izve4a sVjtOL1CEL1e00vrNLzUmch8qss9Cu0i3qd3k8UANp3SgKIaOkJt4S/HeEcLfy5J kf6hVKlgPDuakVtAJKyhbLVQsfHVNkfiyvplta9lDot/iJchJITTRkadP6LblVeo knl8V+VO9kzQUvGauxtu66Q3DJ/7mqbzHUwPISetdKCV9ZXkuPFHnu0AEP577mrx e1JAPA/a8lF3up5QhqIb0uzH3sczOd8nNN/b1Xnxl7Kogyl8SUjhmX3FFy88borj /8Ptv/fFMQZs9IJ0QWlkh5TKRXAtSNAzVy2FpkvLaO0k0gJKQjyJuTKV5ezv/PGU +4m5kDtfpyz//KAOZxq4lERj4EMIEDhHhNbA8Qqmdeoj7oaKZ+gW57enOXohCTbi gVE6xDr2u4oQ85j3JuQo5W5mZA4uza35Gh4t43n5akdrrkLFVLW584hDxShGx9uS B0maToGbzOdGJTZXZ2SLHZ5Da14Lzb/TooCufF8GMAISb99vbjw= =D9zc -----END PGP SIGNATURE----- Merge tag 'for-5.1-part1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "This contains usual mix of new features, core changes and fixes; full list below. I'm planning second pull request, with a few more fixes that arrived recently but too close to merge window, will send it next week. New features: - support zstd compression levels - new ioctl to unregister a device from the module (ie. reverse of device scan) - scrub prints a message to log when it's about to start or finish Core changes: - qgroups can now skip part of a tree that does not get updated during relocation, because this does not affect the quota accounting, estimated speedup in run time is about 20% - the compression workspace management had to be enhanced due to zstd requirements - various enospc fixes, when there's high fragmentation the over-reservation can cause ENOSPC that might not happen after a flush, in such cases try to wait if the situation improves Fixes: - various ioctls could overwrite previous return value if copy_to_user fails, fix this so the original error is reported - more reclaim vs GFP_KERNEL fixes - other cleanups and refactoring - fix a (valid) lockdep warning in a test when device replace is destroying worker threads - make qgroup async transaction commit more aggressive, this avoids some 'quota limit reached' errors if there are not enough data to trigger transaction in order to flush - fix deadlock between snapshot deletion and quotas when backref walking is called from context that already holds the same locks - fsync fixes: - fix fsync after succession of renames of different files - fix fsync after succession of renames and unlink/rmdir" * tag 'for-5.1-part1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (92 commits) btrfs: Remove unnecessary casts in btrfs_read_root_item Btrfs: remove assertion when searching for a key in a node/leaf Btrfs: add missing error handling after doing leaf/node binary search btrfs: drop the lock on error in btrfs_dev_replace_cancel btrfs: ensure that a DUP or RAID1 block group has exactly two stripes btrfs: init csum_list before possible free Btrfs: remove no longer needed range length checks for deduplication Btrfs: fix fsync after succession of renames and unlink/rmdir Btrfs: fix fsync after succession of renames of different files btrfs: honor path->skip_locking in backref code btrfs: qgroup: Make qgroup async transaction commit more aggressive btrfs: qgroup: Move reserved data accounting from btrfs_delayed_ref_head to btrfs_qgroup_extent_record btrfs: scrub: remove unused nocow worker pointer btrfs: scrub: add assertions for worker pointers btrfs: scrub: convert scrub_workers_refcnt to refcount_t btrfs: scrub: add scrub_lock lockdep check in scrub_workers_get btrfs: scrub: fix circular locking dependency warning btrfs: fix comment its device list mutex not volume lock btrfs: extent_io: Kill the forward declaration of flush_write_bio btrfs: Fix grossly misleading argument names in extent io search ...
This commit is contained in:
commit
b1e243957e
@ -9,6 +9,7 @@
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "ctree.h"
|
||||
@ -72,8 +73,16 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
unsigned int nofs_flag;
|
||||
|
||||
size = posix_acl_xattr_size(acl->a_count);
|
||||
/*
|
||||
* We're holding a transaction handle, so use a NOFS memory
|
||||
* allocation context to avoid deadlock if reclaim happens.
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
value = kmalloc(size, GFP_KERNEL);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (!value) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
@ -139,13 +139,11 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
|
||||
}
|
||||
|
||||
if (flags & WQ_HIGHPRI)
|
||||
ret->normal_wq = alloc_workqueue("%s-%s-high", flags,
|
||||
ret->current_active, "btrfs",
|
||||
name);
|
||||
ret->normal_wq = alloc_workqueue("btrfs-%s-high", flags,
|
||||
ret->current_active, name);
|
||||
else
|
||||
ret->normal_wq = alloc_workqueue("%s-%s", flags,
|
||||
ret->current_active, "btrfs",
|
||||
name);
|
||||
ret->normal_wq = alloc_workqueue("btrfs-%s", flags,
|
||||
ret->current_active, name);
|
||||
if (!ret->normal_wq) {
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
|
@ -712,7 +712,7 @@ out:
|
||||
* read tree blocks and add keys where required.
|
||||
*/
|
||||
static int add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||
struct preftrees *preftrees)
|
||||
struct preftrees *preftrees, bool lock)
|
||||
{
|
||||
struct prelim_ref *ref;
|
||||
struct extent_buffer *eb;
|
||||
@ -737,12 +737,14 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||
free_extent_buffer(eb);
|
||||
return -EIO;
|
||||
}
|
||||
btrfs_tree_read_lock(eb);
|
||||
if (lock)
|
||||
btrfs_tree_read_lock(eb);
|
||||
if (btrfs_header_level(eb) == 0)
|
||||
btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
|
||||
else
|
||||
btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
|
||||
btrfs_tree_read_unlock(eb);
|
||||
if (lock)
|
||||
btrfs_tree_read_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
|
||||
cond_resched();
|
||||
@ -1227,7 +1229,7 @@ again:
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = add_missing_keys(fs_info, &preftrees);
|
||||
ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1288,11 +1290,15 @@ again:
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
|
||||
if (!path->skip_locking) {
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
}
|
||||
ret = find_extent_in_eb(eb, bytenr,
|
||||
*extent_item_pos, &eie, ignore_offset);
|
||||
btrfs_tree_read_unlock_blocking(eb);
|
||||
if (!path->skip_locking)
|
||||
btrfs_tree_read_unlock_blocking(eb);
|
||||
free_extent_buffer(eb);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@ -1650,7 +1656,7 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||
/* make sure we can use eb after releasing the path */
|
||||
if (eb != eb_in) {
|
||||
if (!path->skip_locking)
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
path->nodes[0] = NULL;
|
||||
path->locks[0] = 0;
|
||||
}
|
||||
|
@ -730,6 +730,28 @@ struct heuristic_ws {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static struct workspace_manager heuristic_wsm;
|
||||
|
||||
static void heuristic_init_workspace_manager(void)
|
||||
{
|
||||
btrfs_init_workspace_manager(&heuristic_wsm, &btrfs_heuristic_compress);
|
||||
}
|
||||
|
||||
static void heuristic_cleanup_workspace_manager(void)
|
||||
{
|
||||
btrfs_cleanup_workspace_manager(&heuristic_wsm);
|
||||
}
|
||||
|
||||
static struct list_head *heuristic_get_workspace(unsigned int level)
|
||||
{
|
||||
return btrfs_get_workspace(&heuristic_wsm, level);
|
||||
}
|
||||
|
||||
static void heuristic_put_workspace(struct list_head *ws)
|
||||
{
|
||||
btrfs_put_workspace(&heuristic_wsm, ws);
|
||||
}
|
||||
|
||||
static void free_heuristic_ws(struct list_head *ws)
|
||||
{
|
||||
struct heuristic_ws *workspace;
|
||||
@ -742,7 +764,7 @@ static void free_heuristic_ws(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *alloc_heuristic_ws(void)
|
||||
static struct list_head *alloc_heuristic_ws(unsigned int level)
|
||||
{
|
||||
struct heuristic_ws *ws;
|
||||
|
||||
@ -769,65 +791,59 @@ fail:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
struct workspaces_list {
|
||||
struct list_head idle_ws;
|
||||
spinlock_t ws_lock;
|
||||
/* Number of free workspaces */
|
||||
int free_ws;
|
||||
/* Total number of allocated workspaces */
|
||||
atomic_t total_ws;
|
||||
/* Waiters for a free workspace */
|
||||
wait_queue_head_t ws_wait;
|
||||
const struct btrfs_compress_op btrfs_heuristic_compress = {
|
||||
.init_workspace_manager = heuristic_init_workspace_manager,
|
||||
.cleanup_workspace_manager = heuristic_cleanup_workspace_manager,
|
||||
.get_workspace = heuristic_get_workspace,
|
||||
.put_workspace = heuristic_put_workspace,
|
||||
.alloc_workspace = alloc_heuristic_ws,
|
||||
.free_workspace = free_heuristic_ws,
|
||||
};
|
||||
|
||||
static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
|
||||
|
||||
static struct workspaces_list btrfs_heuristic_ws;
|
||||
|
||||
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
|
||||
/* The heuristic is represented as compression type 0 */
|
||||
&btrfs_heuristic_compress,
|
||||
&btrfs_zlib_compress,
|
||||
&btrfs_lzo_compress,
|
||||
&btrfs_zstd_compress,
|
||||
};
|
||||
|
||||
void __init btrfs_init_compress(void)
|
||||
void btrfs_init_workspace_manager(struct workspace_manager *wsm,
|
||||
const struct btrfs_compress_op *ops)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int i;
|
||||
|
||||
INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
|
||||
spin_lock_init(&btrfs_heuristic_ws.ws_lock);
|
||||
atomic_set(&btrfs_heuristic_ws.total_ws, 0);
|
||||
init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
|
||||
wsm->ops = ops;
|
||||
|
||||
workspace = alloc_heuristic_ws();
|
||||
INIT_LIST_HEAD(&wsm->idle_ws);
|
||||
spin_lock_init(&wsm->ws_lock);
|
||||
atomic_set(&wsm->total_ws, 0);
|
||||
init_waitqueue_head(&wsm->ws_wait);
|
||||
|
||||
/*
|
||||
* Preallocate one workspace for each compression type so we can
|
||||
* guarantee forward progress in the worst case
|
||||
*/
|
||||
workspace = wsm->ops->alloc_workspace(0);
|
||||
if (IS_ERR(workspace)) {
|
||||
pr_warn(
|
||||
"BTRFS: cannot preallocate heuristic workspace, will try later\n");
|
||||
"BTRFS: cannot preallocate compression workspace, will try later\n");
|
||||
} else {
|
||||
atomic_set(&btrfs_heuristic_ws.total_ws, 1);
|
||||
btrfs_heuristic_ws.free_ws = 1;
|
||||
list_add(workspace, &btrfs_heuristic_ws.idle_ws);
|
||||
atomic_set(&wsm->total_ws, 1);
|
||||
wsm->free_ws = 1;
|
||||
list_add(workspace, &wsm->idle_ws);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
|
||||
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
|
||||
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
|
||||
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
|
||||
void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
|
||||
{
|
||||
struct list_head *ws;
|
||||
|
||||
/*
|
||||
* Preallocate one workspace for each compression type so
|
||||
* we can guarantee forward progress in the worst case
|
||||
*/
|
||||
workspace = btrfs_compress_op[i]->alloc_workspace();
|
||||
if (IS_ERR(workspace)) {
|
||||
pr_warn("BTRFS: cannot preallocate compression workspace, will try later\n");
|
||||
} else {
|
||||
atomic_set(&btrfs_comp_ws[i].total_ws, 1);
|
||||
btrfs_comp_ws[i].free_ws = 1;
|
||||
list_add(workspace, &btrfs_comp_ws[i].idle_ws);
|
||||
}
|
||||
while (!list_empty(&wsman->idle_ws)) {
|
||||
ws = wsman->idle_ws.next;
|
||||
list_del(ws);
|
||||
wsman->ops->free_workspace(ws);
|
||||
atomic_dec(&wsman->total_ws);
|
||||
}
|
||||
}
|
||||
|
||||
@ -837,11 +853,11 @@ void __init btrfs_init_compress(void)
|
||||
* Preallocation makes a forward progress guarantees and we do not return
|
||||
* errors.
|
||||
*/
|
||||
static struct list_head *__find_workspace(int type, bool heuristic)
|
||||
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
|
||||
unsigned int level)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int cpus = num_online_cpus();
|
||||
int idx = type - 1;
|
||||
unsigned nofs_flag;
|
||||
struct list_head *idle_ws;
|
||||
spinlock_t *ws_lock;
|
||||
@ -849,19 +865,11 @@ static struct list_head *__find_workspace(int type, bool heuristic)
|
||||
wait_queue_head_t *ws_wait;
|
||||
int *free_ws;
|
||||
|
||||
if (heuristic) {
|
||||
idle_ws = &btrfs_heuristic_ws.idle_ws;
|
||||
ws_lock = &btrfs_heuristic_ws.ws_lock;
|
||||
total_ws = &btrfs_heuristic_ws.total_ws;
|
||||
ws_wait = &btrfs_heuristic_ws.ws_wait;
|
||||
free_ws = &btrfs_heuristic_ws.free_ws;
|
||||
} else {
|
||||
idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
}
|
||||
idle_ws = &wsm->idle_ws;
|
||||
ws_lock = &wsm->ws_lock;
|
||||
total_ws = &wsm->total_ws;
|
||||
ws_wait = &wsm->ws_wait;
|
||||
free_ws = &wsm->free_ws;
|
||||
|
||||
again:
|
||||
spin_lock(ws_lock);
|
||||
@ -892,10 +900,7 @@ again:
|
||||
* context of btrfs_compress_bio/btrfs_compress_pages
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
if (heuristic)
|
||||
workspace = alloc_heuristic_ws();
|
||||
else
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
workspace = wsm->ops->alloc_workspace(level);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
if (IS_ERR(workspace)) {
|
||||
@ -926,85 +931,47 @@ again:
|
||||
return workspace;
|
||||
}
|
||||
|
||||
static struct list_head *find_workspace(int type)
|
||||
static struct list_head *get_workspace(int type, int level)
|
||||
{
|
||||
return __find_workspace(type, false);
|
||||
return btrfs_compress_op[type]->get_workspace(level);
|
||||
}
|
||||
|
||||
/*
|
||||
* put a workspace struct back on the list or free it if we have enough
|
||||
* idle ones sitting around
|
||||
*/
|
||||
static void __free_workspace(int type, struct list_head *workspace,
|
||||
bool heuristic)
|
||||
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
|
||||
{
|
||||
int idx = type - 1;
|
||||
struct list_head *idle_ws;
|
||||
spinlock_t *ws_lock;
|
||||
atomic_t *total_ws;
|
||||
wait_queue_head_t *ws_wait;
|
||||
int *free_ws;
|
||||
|
||||
if (heuristic) {
|
||||
idle_ws = &btrfs_heuristic_ws.idle_ws;
|
||||
ws_lock = &btrfs_heuristic_ws.ws_lock;
|
||||
total_ws = &btrfs_heuristic_ws.total_ws;
|
||||
ws_wait = &btrfs_heuristic_ws.ws_wait;
|
||||
free_ws = &btrfs_heuristic_ws.free_ws;
|
||||
} else {
|
||||
idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
}
|
||||
idle_ws = &wsm->idle_ws;
|
||||
ws_lock = &wsm->ws_lock;
|
||||
total_ws = &wsm->total_ws;
|
||||
ws_wait = &wsm->ws_wait;
|
||||
free_ws = &wsm->free_ws;
|
||||
|
||||
spin_lock(ws_lock);
|
||||
if (*free_ws <= num_online_cpus()) {
|
||||
list_add(workspace, idle_ws);
|
||||
list_add(ws, idle_ws);
|
||||
(*free_ws)++;
|
||||
spin_unlock(ws_lock);
|
||||
goto wake;
|
||||
}
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
if (heuristic)
|
||||
free_heuristic_ws(workspace);
|
||||
else
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
wsm->ops->free_workspace(ws);
|
||||
atomic_dec(total_ws);
|
||||
wake:
|
||||
cond_wake_up(ws_wait);
|
||||
}
|
||||
|
||||
static void free_workspace(int type, struct list_head *ws)
|
||||
static void put_workspace(int type, struct list_head *ws)
|
||||
{
|
||||
return __free_workspace(type, ws, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* cleanup function for module exit
|
||||
*/
|
||||
static void free_workspaces(void)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int i;
|
||||
|
||||
while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
|
||||
workspace = btrfs_heuristic_ws.idle_ws.next;
|
||||
list_del(workspace);
|
||||
free_heuristic_ws(workspace);
|
||||
atomic_dec(&btrfs_heuristic_ws.total_ws);
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
|
||||
workspace = btrfs_comp_ws[i].idle_ws.next;
|
||||
list_del(workspace);
|
||||
btrfs_compress_op[i]->free_workspace(workspace);
|
||||
atomic_dec(&btrfs_comp_ws[i].total_ws);
|
||||
}
|
||||
}
|
||||
return btrfs_compress_op[type]->put_workspace(ws);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1036,18 +1003,17 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out)
|
||||
{
|
||||
int type = btrfs_compress_type(type_level);
|
||||
int level = btrfs_compress_level(type_level);
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
int type = type_level & 0xF;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
|
||||
btrfs_compress_op[type - 1]->set_level(workspace, type_level);
|
||||
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
|
||||
workspace = get_workspace(type, level);
|
||||
ret = btrfs_compress_op[type]->compress_pages(workspace, mapping,
|
||||
start, pages,
|
||||
out_pages,
|
||||
total_in, total_out);
|
||||
free_workspace(type, workspace);
|
||||
put_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1071,9 +1037,9 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
|
||||
int ret;
|
||||
int type = cb->compress_type;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
ret = btrfs_compress_op[type - 1]->decompress_bio(workspace, cb);
|
||||
free_workspace(type, workspace);
|
||||
workspace = get_workspace(type, 0);
|
||||
ret = btrfs_compress_op[type]->decompress_bio(workspace, cb);
|
||||
put_workspace(type, workspace);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1089,19 +1055,29 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
|
||||
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
|
||||
workspace = get_workspace(type, 0);
|
||||
ret = btrfs_compress_op[type]->decompress(workspace, data_in,
|
||||
dest_page, start_byte,
|
||||
srclen, destlen);
|
||||
put_workspace(type, workspace);
|
||||
|
||||
free_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __init btrfs_init_compress(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
|
||||
btrfs_compress_op[i]->init_workspace_manager();
|
||||
}
|
||||
|
||||
void __cold btrfs_exit_compress(void)
|
||||
{
|
||||
free_workspaces();
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
|
||||
btrfs_compress_op[i]->cleanup_workspace_manager();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1512,7 +1488,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
|
||||
*/
|
||||
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
|
||||
{
|
||||
struct list_head *ws_list = __find_workspace(0, true);
|
||||
struct list_head *ws_list = get_workspace(0, 0);
|
||||
struct heuristic_ws *ws;
|
||||
u32 i;
|
||||
u8 byte;
|
||||
@ -1581,18 +1557,29 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
|
||||
}
|
||||
|
||||
out:
|
||||
__free_workspace(0, ws_list, true);
|
||||
put_workspace(0, ws_list);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int btrfs_compress_str2level(const char *str)
|
||||
/*
|
||||
* Convert the compression suffix (eg. after "zlib" starting with ":") to
|
||||
* level, unrecognized string will set the default level
|
||||
*/
|
||||
unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
|
||||
{
|
||||
if (strncmp(str, "zlib", 4) != 0)
|
||||
unsigned int level = 0;
|
||||
int ret;
|
||||
|
||||
if (!type)
|
||||
return 0;
|
||||
|
||||
/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
|
||||
if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
|
||||
return str[5] - '0';
|
||||
if (str[0] == ':') {
|
||||
ret = kstrtouint(str + 1, 10, &level);
|
||||
if (ret)
|
||||
level = 0;
|
||||
}
|
||||
|
||||
return BTRFS_ZLIB_DEFAULT_LEVEL;
|
||||
level = btrfs_compress_op[type]->set_level(level);
|
||||
|
||||
return level;
|
||||
}
|
||||
|
@ -64,6 +64,16 @@ struct compressed_bio {
|
||||
u32 sums;
|
||||
};
|
||||
|
||||
static inline unsigned int btrfs_compress_type(unsigned int type_level)
|
||||
{
|
||||
return (type_level & 0xF);
|
||||
}
|
||||
|
||||
static inline unsigned int btrfs_compress_level(unsigned int type_level)
|
||||
{
|
||||
return ((type_level & 0xF0) >> 4);
|
||||
}
|
||||
|
||||
void __init btrfs_init_compress(void);
|
||||
void __cold btrfs_exit_compress(void);
|
||||
|
||||
@ -87,7 +97,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
|
||||
unsigned btrfs_compress_str2level(const char *str);
|
||||
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
|
||||
|
||||
enum btrfs_compression_type {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
@ -97,8 +107,35 @@ enum btrfs_compression_type {
|
||||
BTRFS_COMPRESS_TYPES = 3,
|
||||
};
|
||||
|
||||
struct workspace_manager {
|
||||
const struct btrfs_compress_op *ops;
|
||||
struct list_head idle_ws;
|
||||
spinlock_t ws_lock;
|
||||
/* Number of free workspaces */
|
||||
int free_ws;
|
||||
/* Total number of allocated workspaces */
|
||||
atomic_t total_ws;
|
||||
/* Waiters for a free workspace */
|
||||
wait_queue_head_t ws_wait;
|
||||
};
|
||||
|
||||
void btrfs_init_workspace_manager(struct workspace_manager *wsm,
|
||||
const struct btrfs_compress_op *ops);
|
||||
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
|
||||
unsigned int level);
|
||||
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws);
|
||||
void btrfs_cleanup_workspace_manager(struct workspace_manager *wsm);
|
||||
|
||||
struct btrfs_compress_op {
|
||||
struct list_head *(*alloc_workspace)(void);
|
||||
void (*init_workspace_manager)(void);
|
||||
|
||||
void (*cleanup_workspace_manager)(void);
|
||||
|
||||
struct list_head *(*get_workspace)(unsigned int level);
|
||||
|
||||
void (*put_workspace)(struct list_head *ws);
|
||||
|
||||
struct list_head *(*alloc_workspace)(unsigned int level);
|
||||
|
||||
void (*free_workspace)(struct list_head *workspace);
|
||||
|
||||
@ -119,9 +156,18 @@ struct btrfs_compress_op {
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen);
|
||||
|
||||
void (*set_level)(struct list_head *ws, unsigned int type);
|
||||
/*
|
||||
* This bounds the level set by the user to be within range of a
|
||||
* particular compression type. It returns the level that will be used
|
||||
* if the level is out of bounds or the default if 0 is passed in.
|
||||
*/
|
||||
unsigned int (*set_level)(unsigned int level);
|
||||
};
|
||||
|
||||
/* The heuristic workspaces are managed via the 0th workspace manager */
|
||||
#define BTRFS_NR_WORKSPACE_MANAGERS (BTRFS_COMPRESS_TYPES + 1)
|
||||
|
||||
extern const struct btrfs_compress_op btrfs_heuristic_compress;
|
||||
extern const struct btrfs_compress_op btrfs_zlib_compress;
|
||||
extern const struct btrfs_compress_op btrfs_lzo_compress;
|
||||
extern const struct btrfs_compress_op btrfs_zstd_compress;
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "print-tree.h"
|
||||
#include "locking.h"
|
||||
#include "volumes.h"
|
||||
#include "qgroup.h"
|
||||
|
||||
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
*root, struct btrfs_path *path, int level);
|
||||
@ -45,11 +46,18 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
|
||||
if (!p->nodes[i] || !p->locks[i])
|
||||
continue;
|
||||
btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
|
||||
if (p->locks[i] == BTRFS_READ_LOCK)
|
||||
/*
|
||||
* If we currently have a spinning reader or writer lock this
|
||||
* will bump the count of blocking holders and drop the
|
||||
* spinlock.
|
||||
*/
|
||||
if (p->locks[i] == BTRFS_READ_LOCK) {
|
||||
btrfs_set_lock_blocking_read(p->nodes[i]);
|
||||
p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
|
||||
else if (p->locks[i] == BTRFS_WRITE_LOCK)
|
||||
} else if (p->locks[i] == BTRFS_WRITE_LOCK) {
|
||||
btrfs_set_lock_blocking_write(p->nodes[i]);
|
||||
p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1288,7 +1296,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||
return eb;
|
||||
|
||||
btrfs_set_path_blocking(path);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
|
||||
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
|
||||
BUG_ON(tm->slot != 0);
|
||||
@ -1378,7 +1386,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
|
||||
free_extent_buffer(eb_root);
|
||||
eb = alloc_dummy_extent_buffer(fs_info, logical);
|
||||
} else {
|
||||
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb_root);
|
||||
eb = btrfs_clone_extent_buffer(eb_root);
|
||||
btrfs_tree_read_unlock_blocking(eb_root);
|
||||
free_extent_buffer(eb_root);
|
||||
@ -1486,9 +1494,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||
search_start = buf->start & ~((u64)SZ_1G - 1);
|
||||
|
||||
if (parent)
|
||||
btrfs_set_lock_blocking(parent);
|
||||
btrfs_set_lock_blocking(buf);
|
||||
btrfs_set_lock_blocking_write(parent);
|
||||
btrfs_set_lock_blocking_write(buf);
|
||||
|
||||
/*
|
||||
* Before CoWing this block for later modification, check if it's
|
||||
* the subtree root and do the delayed subtree trace if needed.
|
||||
*
|
||||
* Also We don't care about the error, as it's handled internally.
|
||||
*/
|
||||
btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
|
||||
ret = __btrfs_cow_block(trans, root, buf, parent,
|
||||
parent_slot, cow_ret, search_start, 0);
|
||||
|
||||
@ -1582,7 +1597,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||
if (parent_nritems <= 1)
|
||||
return 0;
|
||||
|
||||
btrfs_set_lock_blocking(parent);
|
||||
btrfs_set_lock_blocking_write(parent);
|
||||
|
||||
for (i = start_slot; i <= end_slot; i++) {
|
||||
struct btrfs_key first_key;
|
||||
@ -1641,7 +1656,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||
search_start = last_block;
|
||||
|
||||
btrfs_tree_lock(cur);
|
||||
btrfs_set_lock_blocking(cur);
|
||||
btrfs_set_lock_blocking_write(cur);
|
||||
err = __btrfs_cow_block(trans, root, cur, parent, i,
|
||||
&cur, search_start,
|
||||
min(16 * blocksize,
|
||||
@ -1856,7 +1871,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
btrfs_tree_lock(child);
|
||||
btrfs_set_lock_blocking(child);
|
||||
btrfs_set_lock_blocking_write(child);
|
||||
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
|
||||
if (ret) {
|
||||
btrfs_tree_unlock(child);
|
||||
@ -1894,7 +1909,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (left) {
|
||||
btrfs_tree_lock(left);
|
||||
btrfs_set_lock_blocking(left);
|
||||
btrfs_set_lock_blocking_write(left);
|
||||
wret = btrfs_cow_block(trans, root, left,
|
||||
parent, pslot - 1, &left);
|
||||
if (wret) {
|
||||
@ -1909,7 +1924,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (right) {
|
||||
btrfs_tree_lock(right);
|
||||
btrfs_set_lock_blocking(right);
|
||||
btrfs_set_lock_blocking_write(right);
|
||||
wret = btrfs_cow_block(trans, root, right,
|
||||
parent, pslot + 1, &right);
|
||||
if (wret) {
|
||||
@ -2072,7 +2087,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||
u32 left_nr;
|
||||
|
||||
btrfs_tree_lock(left);
|
||||
btrfs_set_lock_blocking(left);
|
||||
btrfs_set_lock_blocking_write(left);
|
||||
|
||||
left_nr = btrfs_header_nritems(left);
|
||||
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
||||
@ -2127,7 +2142,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||
u32 right_nr;
|
||||
|
||||
btrfs_tree_lock(right);
|
||||
btrfs_set_lock_blocking(right);
|
||||
btrfs_set_lock_blocking_write(right);
|
||||
|
||||
right_nr = btrfs_header_nritems(right);
|
||||
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
||||
@ -2529,26 +2544,6 @@ done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void key_search_validate(struct extent_buffer *b,
|
||||
const struct btrfs_key *key,
|
||||
int level)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
struct btrfs_disk_key disk_key;
|
||||
|
||||
btrfs_cpu_key_to_disk(&disk_key, key);
|
||||
|
||||
if (level == 0)
|
||||
ASSERT(!memcmp_extent_buffer(b, &disk_key,
|
||||
offsetof(struct btrfs_leaf, items[0].key),
|
||||
sizeof(disk_key)));
|
||||
else
|
||||
ASSERT(!memcmp_extent_buffer(b, &disk_key,
|
||||
offsetof(struct btrfs_node, ptrs[0].key),
|
||||
sizeof(disk_key)));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
|
||||
int level, int *prev_cmp, int *slot)
|
||||
{
|
||||
@ -2557,7 +2552,6 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
|
||||
return *prev_cmp;
|
||||
}
|
||||
|
||||
key_search_validate(b, key, level);
|
||||
*slot = 0;
|
||||
|
||||
return 0;
|
||||
@ -3005,6 +2999,8 @@ again:
|
||||
*/
|
||||
prev_cmp = -1;
|
||||
ret = key_search(b, key, level, &prev_cmp, &slot);
|
||||
if (ret < 0)
|
||||
goto done;
|
||||
|
||||
if (level != 0) {
|
||||
int dec = 0;
|
||||
@ -3771,7 +3767,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
return 1;
|
||||
|
||||
btrfs_tree_lock(right);
|
||||
btrfs_set_lock_blocking(right);
|
||||
btrfs_set_lock_blocking_write(right);
|
||||
|
||||
free_space = btrfs_leaf_free_space(fs_info, right);
|
||||
if (free_space < data_size)
|
||||
@ -4005,7 +4001,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
return 1;
|
||||
|
||||
btrfs_tree_lock(left);
|
||||
btrfs_set_lock_blocking(left);
|
||||
btrfs_set_lock_blocking_write(left);
|
||||
|
||||
free_space = btrfs_leaf_free_space(fs_info, left);
|
||||
if (free_space < data_size) {
|
||||
@ -5156,6 +5152,10 @@ again:
|
||||
nritems = btrfs_header_nritems(cur);
|
||||
level = btrfs_header_level(cur);
|
||||
sret = btrfs_bin_search(cur, min_key, level, &slot);
|
||||
if (sret < 0) {
|
||||
ret = sret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* at the lowest level, we're done, setup the path and exit */
|
||||
if (level == path->lowest_level) {
|
||||
|
@ -934,7 +934,8 @@ struct btrfs_fs_info {
|
||||
|
||||
spinlock_t delayed_iput_lock;
|
||||
struct list_head delayed_iputs;
|
||||
struct mutex cleaner_delayed_iput_mutex;
|
||||
atomic_t nr_delayed_iputs;
|
||||
wait_queue_head_t delayed_iputs_wait;
|
||||
|
||||
/* this protects tree_mod_seq_list */
|
||||
spinlock_t tree_mod_seq_lock;
|
||||
@ -1074,10 +1075,13 @@ struct btrfs_fs_info {
|
||||
atomic_t scrubs_paused;
|
||||
atomic_t scrub_cancel_req;
|
||||
wait_queue_head_t scrub_pause_wait;
|
||||
int scrub_workers_refcnt;
|
||||
/*
|
||||
* The worker pointers are NULL iff the refcount is 0, ie. scrub is not
|
||||
* running.
|
||||
*/
|
||||
refcount_t scrub_workers_refcnt;
|
||||
struct btrfs_workqueue *scrub_workers;
|
||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||
struct btrfs_workqueue *scrub_nocow_workers;
|
||||
struct btrfs_workqueue *scrub_parity_workers;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
@ -1199,6 +1203,24 @@ enum {
|
||||
BTRFS_ROOT_MULTI_LOG_TASKS,
|
||||
BTRFS_ROOT_DIRTY,
|
||||
BTRFS_ROOT_DELETING,
|
||||
|
||||
/*
|
||||
* Reloc tree is orphan, only kept here for qgroup delayed subtree scan
|
||||
*
|
||||
* Set for the subvolume tree owning the reloc tree.
|
||||
*/
|
||||
BTRFS_ROOT_DEAD_RELOC_TREE,
|
||||
};
|
||||
|
||||
/*
|
||||
* Record swapped tree blocks of a subvolume tree for delayed subtree trace
|
||||
* code. For detail check comment in fs/btrfs/qgroup.c.
|
||||
*/
|
||||
struct btrfs_qgroup_swapped_blocks {
|
||||
spinlock_t lock;
|
||||
/* RM_EMPTY_ROOT() of above blocks[] */
|
||||
bool swapped;
|
||||
struct rb_root blocks[BTRFS_MAX_LEVEL];
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1311,6 +1333,14 @@ struct btrfs_root {
|
||||
struct list_head ordered_root;
|
||||
u64 nr_ordered_extents;
|
||||
|
||||
/*
|
||||
* Not empty if this subvolume root has gone through tree block swap
|
||||
* (relocation)
|
||||
*
|
||||
* Will be used by reloc_control::dirty_subvol_roots.
|
||||
*/
|
||||
struct list_head reloc_dirty_list;
|
||||
|
||||
/*
|
||||
* Number of currently running SEND ioctls to prevent
|
||||
* manipulation with the read-only status via SUBVOL_SETFLAGS
|
||||
@ -1328,6 +1358,9 @@ struct btrfs_root {
|
||||
/* Number of active swapfiles */
|
||||
atomic_t nr_swapfiles;
|
||||
|
||||
/* Record pairs of swapped blocks for qgroup */
|
||||
struct btrfs_qgroup_swapped_blocks swapped_blocks;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
u64 alloc_bytenr;
|
||||
#endif
|
||||
@ -2775,7 +2808,8 @@ enum btrfs_flush_state {
|
||||
FLUSH_DELALLOC = 5,
|
||||
FLUSH_DELALLOC_WAIT = 6,
|
||||
ALLOC_CHUNK = 7,
|
||||
COMMIT_TRANS = 8,
|
||||
ALLOC_CHUNK_FORCE = 8,
|
||||
COMMIT_TRANS = 9,
|
||||
};
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
@ -3181,8 +3215,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
|
||||
/* inode.c */
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset, u64 start,
|
||||
u64 len, int create);
|
||||
u64 start, u64 len);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
u64 *orig_start, u64 *orig_block_len,
|
||||
u64 *ram_bytes);
|
||||
@ -3254,6 +3287,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
|
||||
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
|
||||
void btrfs_add_delayed_iput(struct inode *inode);
|
||||
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
@ -3261,7 +3295,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
|
||||
struct btrfs_trans_handle *trans, int mode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
int btrfs_run_delalloc_range(void *private_data, struct page *locked_page,
|
||||
int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, int *page_started, unsigned long *nr_written,
|
||||
struct writeback_control *wbc);
|
||||
int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
|
||||
@ -3490,21 +3524,18 @@ do { \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
|
||||
__cold
|
||||
static inline void assfail(const char *expr, const char *file, int line)
|
||||
{
|
||||
pr_err("assertion failed: %s, file: %s, line: %d\n",
|
||||
expr, file, line);
|
||||
BUG();
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
|
||||
pr_err("assertion failed: %s, file: %s, line: %d\n",
|
||||
expr, file, line);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
#define ASSERT(expr) \
|
||||
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
|
||||
#else
|
||||
#define ASSERT(expr) ((void)0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use that for functions that are conditionally exported for sanity tests but
|
||||
|
@ -602,17 +602,14 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
|
||||
RB_CLEAR_NODE(&head_ref->href_node);
|
||||
head_ref->processing = 0;
|
||||
head_ref->total_ref_mod = count_mod;
|
||||
head_ref->qgroup_reserved = 0;
|
||||
head_ref->qgroup_ref_root = 0;
|
||||
spin_lock_init(&head_ref->lock);
|
||||
mutex_init(&head_ref->mutex);
|
||||
|
||||
if (qrecord) {
|
||||
if (ref_root && reserved) {
|
||||
head_ref->qgroup_ref_root = ref_root;
|
||||
head_ref->qgroup_reserved = reserved;
|
||||
qrecord->data_rsv = reserved;
|
||||
qrecord->data_rsv_refroot = ref_root;
|
||||
}
|
||||
|
||||
qrecord->bytenr = bytenr;
|
||||
qrecord->num_bytes = num_bytes;
|
||||
qrecord->old_roots = NULL;
|
||||
@ -651,10 +648,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
|
||||
existing = htree_insert(&delayed_refs->href_root,
|
||||
&head_ref->href_node);
|
||||
if (existing) {
|
||||
WARN_ON(qrecord && head_ref->qgroup_ref_root
|
||||
&& head_ref->qgroup_reserved
|
||||
&& existing->qgroup_ref_root
|
||||
&& existing->qgroup_reserved);
|
||||
update_existing_head_ref(trans, existing, head_ref,
|
||||
old_ref_mod);
|
||||
/*
|
||||
@ -770,7 +763,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||
is_fstree(ref_root)) {
|
||||
record = kmalloc(sizeof(*record), GFP_NOFS);
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record) {
|
||||
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
|
||||
@ -867,7 +860,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||
is_fstree(ref_root)) {
|
||||
record = kmalloc(sizeof(*record), GFP_NOFS);
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record) {
|
||||
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep,
|
||||
|
@ -102,17 +102,6 @@ struct btrfs_delayed_ref_head {
|
||||
*/
|
||||
int ref_mod;
|
||||
|
||||
/*
|
||||
* For qgroup reserved space freeing.
|
||||
*
|
||||
* ref_root and reserved will be recorded after
|
||||
* BTRFS_ADD_DELAYED_EXTENT is called.
|
||||
* And will be used to free reserved qgroup space at
|
||||
* run_delayed_refs() time.
|
||||
*/
|
||||
u64 qgroup_ref_root;
|
||||
u64 qgroup_reserved;
|
||||
|
||||
/*
|
||||
* when a new extent is allocated, it is just reserved in memory
|
||||
* The actual extent isn't inserted into the extent allocation tree
|
||||
|
@ -111,11 +111,11 @@ no_valid_dev_replace_entry_found:
|
||||
break;
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
dev_replace->srcdev = btrfs_find_device(fs_info, src_devid,
|
||||
NULL, NULL);
|
||||
dev_replace->tgtdev = btrfs_find_device(fs_info,
|
||||
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
|
||||
src_devid, NULL, NULL, true);
|
||||
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID,
|
||||
NULL, NULL);
|
||||
NULL, NULL, true);
|
||||
/*
|
||||
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
||||
* missing
|
||||
@ -862,6 +862,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
|
||||
btrfs_destroy_dev_replace_tgtdev(tgt_device);
|
||||
break;
|
||||
default:
|
||||
up_write(&dev_replace->rwsem);
|
||||
result = -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
@ -341,7 +342,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
|
||||
if (need_lock) {
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
}
|
||||
|
||||
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
@ -1120,7 +1121,7 @@ void clean_tree_block(struct btrfs_fs_info *fs_info,
|
||||
-buf->len,
|
||||
fs_info->dirty_metadata_batch);
|
||||
/* ugh, clear_extent_buffer_dirty needs to lock the page */
|
||||
btrfs_set_lock_blocking(buf);
|
||||
btrfs_set_lock_blocking_write(buf);
|
||||
clear_extent_buffer_dirty(buf);
|
||||
}
|
||||
}
|
||||
@ -1175,6 +1176,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
INIT_LIST_HEAD(&root->delalloc_root);
|
||||
INIT_LIST_HEAD(&root->ordered_extents);
|
||||
INIT_LIST_HEAD(&root->ordered_root);
|
||||
INIT_LIST_HEAD(&root->reloc_dirty_list);
|
||||
INIT_LIST_HEAD(&root->logged_list[0]);
|
||||
INIT_LIST_HEAD(&root->logged_list[1]);
|
||||
spin_lock_init(&root->inode_lock);
|
||||
@ -1218,6 +1220,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
root->anon_dev = 0;
|
||||
|
||||
spin_lock_init(&root->root_item_lock);
|
||||
btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
|
||||
}
|
||||
|
||||
static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
|
||||
@ -1258,10 +1261,17 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *tree_root = fs_info->tree_root;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_key key;
|
||||
unsigned int nofs_flag;
|
||||
int ret = 0;
|
||||
uuid_le uuid = NULL_UUID_LE;
|
||||
|
||||
/*
|
||||
* We're holding a transaction handle, so use a NOFS memory allocation
|
||||
* context to avoid deadlock if reclaim happens.
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
root = btrfs_alloc_root(fs_info, GFP_KERNEL);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (!root)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@ -1707,9 +1717,7 @@ static int cleaner_kthread(void *arg)
|
||||
goto sleep;
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
|
||||
again = btrfs_clean_one_deleted_snapshot(root);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
@ -2101,7 +2109,7 @@ static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
|
||||
atomic_set(&fs_info->scrubs_paused, 0);
|
||||
atomic_set(&fs_info->scrub_cancel_req, 0);
|
||||
init_waitqueue_head(&fs_info->scrub_pause_wait);
|
||||
fs_info->scrub_workers_refcnt = 0;
|
||||
refcount_set(&fs_info->scrub_workers_refcnt, 0);
|
||||
}
|
||||
|
||||
static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
|
||||
@ -2666,7 +2674,6 @@ int open_ctree(struct super_block *sb,
|
||||
mutex_init(&fs_info->delete_unused_bgs_mutex);
|
||||
mutex_init(&fs_info->reloc_mutex);
|
||||
mutex_init(&fs_info->delalloc_root_mutex);
|
||||
mutex_init(&fs_info->cleaner_delayed_iput_mutex);
|
||||
seqlock_init(&fs_info->profiles_lock);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
||||
@ -2688,6 +2695,7 @@ int open_ctree(struct super_block *sb,
|
||||
atomic_set(&fs_info->defrag_running, 0);
|
||||
atomic_set(&fs_info->qgroup_op_seq, 0);
|
||||
atomic_set(&fs_info->reada_works_cnt, 0);
|
||||
atomic_set(&fs_info->nr_delayed_iputs, 0);
|
||||
atomic64_set(&fs_info->tree_mod_seq, 0);
|
||||
fs_info->sb = sb;
|
||||
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
|
||||
@ -2765,6 +2773,7 @@ int open_ctree(struct super_block *sb,
|
||||
init_waitqueue_head(&fs_info->transaction_wait);
|
||||
init_waitqueue_head(&fs_info->transaction_blocked_wait);
|
||||
init_waitqueue_head(&fs_info->async_submit_wait);
|
||||
init_waitqueue_head(&fs_info->delayed_iputs_wait);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->pinned_chunks);
|
||||
|
||||
@ -4238,16 +4247,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (btrfs_delayed_ref_lock(delayed_refs, head))
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_lock(&head->lock);
|
||||
while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
|
||||
ref = rb_entry(n, struct btrfs_delayed_ref_node,
|
||||
@ -4263,12 +4265,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
if (head->must_insert_reserved)
|
||||
pin_bytes = true;
|
||||
btrfs_free_delayed_extent_op(head->extent_op);
|
||||
delayed_refs->num_heads--;
|
||||
if (head->processing == 0)
|
||||
delayed_refs->num_heads_ready--;
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
rb_erase_cached(&head->href_node, &delayed_refs->href_root);
|
||||
RB_CLEAR_NODE(&head->href_node);
|
||||
btrfs_delete_ref_head(delayed_refs, head);
|
||||
spin_unlock(&head->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
mutex_unlock(&head->mutex);
|
||||
|
@ -2492,9 +2492,6 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
}
|
||||
|
||||
/* Also free its reserved qgroup space */
|
||||
btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
|
||||
head->qgroup_reserved);
|
||||
btrfs_delayed_refs_rsv_release(fs_info, nr_items);
|
||||
}
|
||||
|
||||
@ -3013,8 +3010,7 @@ again:
|
||||
}
|
||||
|
||||
if (run_all) {
|
||||
if (!list_empty(&trans->new_bgs))
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
node = rb_first_cached(&delayed_refs->href_root);
|
||||
@ -4280,10 +4276,14 @@ commit_trans:
|
||||
/*
|
||||
* The cleaner kthread might still be doing iput
|
||||
* operations. Wait for it to finish so that
|
||||
* more space is released.
|
||||
* more space is released. We don't need to
|
||||
* explicitly run the delayed iputs here because
|
||||
* the commit_transaction would have woken up
|
||||
* the cleaner.
|
||||
*/
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
ret = btrfs_wait_on_delayed_iputs(fs_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto again;
|
||||
} else {
|
||||
btrfs_end_transaction(trans);
|
||||
@ -4396,21 +4396,12 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
|
||||
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *sinfo, int force)
|
||||
{
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
u64 bytes_used = btrfs_space_info_used(sinfo, false);
|
||||
u64 thresh;
|
||||
|
||||
if (force == CHUNK_ALLOC_FORCE)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* We need to take into account the global rsv because for all intents
|
||||
* and purposes it's used space. Don't worry about locking the
|
||||
* global_rsv, it doesn't change except when the transaction commits.
|
||||
*/
|
||||
if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
|
||||
bytes_used += calc_global_rsv_need_space(global_rsv);
|
||||
|
||||
/*
|
||||
* in limited mode, we want to have some free space up to
|
||||
* about 1% of the FS size.
|
||||
@ -4741,7 +4732,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
||||
struct btrfs_space_info *space_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
u64 delalloc_bytes;
|
||||
u64 max_reclaim;
|
||||
u64 async_pages;
|
||||
u64 items;
|
||||
long time_left;
|
||||
unsigned long nr_pages;
|
||||
@ -4766,25 +4757,36 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
|
||||
|
||||
loops = 0;
|
||||
while (delalloc_bytes && loops < 3) {
|
||||
max_reclaim = min(delalloc_bytes, to_reclaim);
|
||||
nr_pages = max_reclaim >> PAGE_SHIFT;
|
||||
btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
|
||||
nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* We need to wait for the async pages to actually start before
|
||||
* we do anything.
|
||||
* Triggers inode writeback for up to nr_pages. This will invoke
|
||||
* ->writepages callback and trigger delalloc filling
|
||||
* (btrfs_run_delalloc_range()).
|
||||
*/
|
||||
max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
|
||||
if (!max_reclaim)
|
||||
btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
|
||||
|
||||
/*
|
||||
* We need to wait for the compressed pages to start before
|
||||
* we continue.
|
||||
*/
|
||||
async_pages = atomic_read(&fs_info->async_delalloc_pages);
|
||||
if (!async_pages)
|
||||
goto skip_async;
|
||||
|
||||
if (max_reclaim <= nr_pages)
|
||||
max_reclaim = 0;
|
||||
/*
|
||||
* Calculate how many compressed pages we want to be written
|
||||
* before we continue. I.e if there are more async pages than we
|
||||
* require wait_event will wait until nr_pages are written.
|
||||
*/
|
||||
if (async_pages <= nr_pages)
|
||||
async_pages = 0;
|
||||
else
|
||||
max_reclaim -= nr_pages;
|
||||
async_pages -= nr_pages;
|
||||
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
atomic_read(&fs_info->async_delalloc_pages) <=
|
||||
(int)max_reclaim);
|
||||
(int)async_pages);
|
||||
skip_async:
|
||||
spin_lock(&space_info->lock);
|
||||
if (list_empty(&space_info->tickets) &&
|
||||
@ -4808,6 +4810,7 @@ skip_async:
|
||||
}
|
||||
|
||||
struct reserve_ticket {
|
||||
u64 orig_bytes;
|
||||
u64 bytes;
|
||||
int error;
|
||||
struct list_head list;
|
||||
@ -4851,10 +4854,19 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
|
||||
if (!bytes_needed)
|
||||
return 0;
|
||||
|
||||
/* See if there is enough pinned space to make this reservation */
|
||||
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
|
||||
bytes_needed,
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
|
||||
trans = btrfs_join_transaction(fs_info->extent_root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
/*
|
||||
* See if there is enough pinned space to make this reservation, or if
|
||||
* we have block groups that are going to be freed, allowing us to
|
||||
* possibly do a chunk allocation the next loop through.
|
||||
*/
|
||||
if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
|
||||
__percpu_counter_compare(&space_info->total_bytes_pinned,
|
||||
bytes_needed,
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
|
||||
goto commit;
|
||||
|
||||
/*
|
||||
@ -4862,7 +4874,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
|
||||
* this reservation.
|
||||
*/
|
||||
if (space_info != delayed_rsv->space_info)
|
||||
return -ENOSPC;
|
||||
goto enospc;
|
||||
|
||||
spin_lock(&delayed_rsv->lock);
|
||||
reclaim_bytes += delayed_rsv->reserved;
|
||||
@ -4877,16 +4889,14 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
|
||||
|
||||
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
|
||||
bytes_needed,
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
|
||||
return -ENOSPC;
|
||||
}
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
|
||||
goto enospc;
|
||||
|
||||
commit:
|
||||
trans = btrfs_join_transaction(fs_info->extent_root);
|
||||
if (IS_ERR(trans))
|
||||
return -ENOSPC;
|
||||
|
||||
return btrfs_commit_transaction(trans);
|
||||
enospc:
|
||||
btrfs_end_transaction(trans);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4939,6 +4949,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
btrfs_end_transaction(trans);
|
||||
break;
|
||||
case ALLOC_CHUNK:
|
||||
case ALLOC_CHUNK_FORCE:
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
@ -4946,7 +4957,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
ret = do_chunk_alloc(trans,
|
||||
btrfs_metadata_alloc_profile(fs_info),
|
||||
CHUNK_ALLOC_NO_FORCE);
|
||||
(state == ALLOC_CHUNK) ?
|
||||
CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
|
||||
btrfs_end_transaction(trans);
|
||||
if (ret > 0 || ret == -ENOSPC)
|
||||
ret = 0;
|
||||
@ -4957,9 +4969,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
* bunch of pinned space, so make sure we run the iputs before
|
||||
* we do our pinned bytes check below.
|
||||
*/
|
||||
mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
|
||||
btrfs_wait_on_delayed_iputs(fs_info);
|
||||
|
||||
ret = may_commit_transaction(fs_info, space_info);
|
||||
break;
|
||||
@ -5030,7 +5041,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
||||
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
|
||||
}
|
||||
|
||||
static void wake_all_tickets(struct list_head *head)
|
||||
static bool wake_all_tickets(struct list_head *head)
|
||||
{
|
||||
struct reserve_ticket *ticket;
|
||||
|
||||
@ -5039,7 +5050,10 @@ static void wake_all_tickets(struct list_head *head)
|
||||
list_del_init(&ticket->list);
|
||||
ticket->error = -ENOSPC;
|
||||
wake_up(&ticket->wait);
|
||||
if (ticket->bytes != ticket->orig_bytes)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5091,11 +5105,28 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
||||
commit_cycles--;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't want to force a chunk allocation until we've tried
|
||||
* pretty hard to reclaim space. Think of the case where we
|
||||
* freed up a bunch of space and so have a lot of pinned space
|
||||
* to reclaim. We would rather use that than possibly create a
|
||||
* underutilized metadata chunk. So if this is our first run
|
||||
* through the flushing state machine skip ALLOC_CHUNK_FORCE and
|
||||
* commit the transaction. If nothing has changed the next go
|
||||
* around then we can force a chunk allocation.
|
||||
*/
|
||||
if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
|
||||
flush_state++;
|
||||
|
||||
if (flush_state > COMMIT_TRANS) {
|
||||
commit_cycles++;
|
||||
if (commit_cycles > 2) {
|
||||
wake_all_tickets(&space_info->tickets);
|
||||
space_info->flush = 0;
|
||||
if (wake_all_tickets(&space_info->tickets)) {
|
||||
flush_state = FLUSH_DELAYED_ITEMS_NR;
|
||||
commit_cycles--;
|
||||
} else {
|
||||
space_info->flush = 0;
|
||||
}
|
||||
} else {
|
||||
flush_state = FLUSH_DELAYED_ITEMS_NR;
|
||||
}
|
||||
@ -5109,12 +5140,18 @@ void btrfs_init_async_reclaim_work(struct work_struct *work)
|
||||
INIT_WORK(work, btrfs_async_reclaim_metadata_space);
|
||||
}
|
||||
|
||||
static const enum btrfs_flush_state priority_flush_states[] = {
|
||||
FLUSH_DELAYED_ITEMS_NR,
|
||||
FLUSH_DELAYED_ITEMS,
|
||||
ALLOC_CHUNK,
|
||||
};
|
||||
|
||||
static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
struct reserve_ticket *ticket)
|
||||
{
|
||||
u64 to_reclaim;
|
||||
int flush_state = FLUSH_DELAYED_ITEMS_NR;
|
||||
int flush_state;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
||||
@ -5125,8 +5162,10 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
flush_state = 0;
|
||||
do {
|
||||
flush_space(fs_info, space_info, to_reclaim, flush_state);
|
||||
flush_space(fs_info, space_info, to_reclaim,
|
||||
priority_flush_states[flush_state]);
|
||||
flush_state++;
|
||||
spin_lock(&space_info->lock);
|
||||
if (ticket->bytes == 0) {
|
||||
@ -5134,23 +5173,16 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
return;
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/*
|
||||
* Priority flushers can't wait on delalloc without
|
||||
* deadlocking.
|
||||
*/
|
||||
if (flush_state == FLUSH_DELALLOC ||
|
||||
flush_state == FLUSH_DELALLOC_WAIT)
|
||||
flush_state = ALLOC_CHUNK;
|
||||
} while (flush_state < COMMIT_TRANS);
|
||||
} while (flush_state < ARRAY_SIZE(priority_flush_states));
|
||||
}
|
||||
|
||||
static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
struct reserve_ticket *ticket, u64 orig_bytes)
|
||||
struct reserve_ticket *ticket)
|
||||
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
u64 reclaim_bytes = 0;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
@ -5171,14 +5203,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
|
||||
ret = ticket->error;
|
||||
if (!list_empty(&ticket->list))
|
||||
list_del_init(&ticket->list);
|
||||
if (ticket->bytes && ticket->bytes < orig_bytes) {
|
||||
u64 num_bytes = orig_bytes - ticket->bytes;
|
||||
update_bytes_may_use(space_info, -num_bytes);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
space_info->flags, num_bytes, 0);
|
||||
}
|
||||
if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
|
||||
reclaim_bytes = ticket->orig_bytes - ticket->bytes;
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
if (reclaim_bytes)
|
||||
space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -5204,6 +5234,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
struct reserve_ticket ticket;
|
||||
u64 used;
|
||||
u64 reclaim_bytes = 0;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(orig_bytes);
|
||||
@ -5239,6 +5270,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
* the list and we will do our own flushing further down.
|
||||
*/
|
||||
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
|
||||
ticket.orig_bytes = orig_bytes;
|
||||
ticket.bytes = orig_bytes;
|
||||
ticket.error = 0;
|
||||
init_waitqueue_head(&ticket.wait);
|
||||
@ -5279,25 +5311,21 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
|
||||
if (flush == BTRFS_RESERVE_FLUSH_ALL)
|
||||
return wait_reserve_ticket(fs_info, space_info, &ticket,
|
||||
orig_bytes);
|
||||
return wait_reserve_ticket(fs_info, space_info, &ticket);
|
||||
|
||||
ret = 0;
|
||||
priority_reclaim_metadata_space(fs_info, space_info, &ticket);
|
||||
spin_lock(&space_info->lock);
|
||||
if (ticket.bytes) {
|
||||
if (ticket.bytes < orig_bytes) {
|
||||
u64 num_bytes = orig_bytes - ticket.bytes;
|
||||
update_bytes_may_use(space_info, -num_bytes);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
space_info->flags,
|
||||
num_bytes, 0);
|
||||
|
||||
}
|
||||
if (ticket.bytes < orig_bytes)
|
||||
reclaim_bytes = orig_bytes - ticket.bytes;
|
||||
list_del_init(&ticket.list);
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
if (reclaim_bytes)
|
||||
space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
|
||||
ASSERT(list_empty(&ticket.list));
|
||||
return ret;
|
||||
}
|
||||
@ -5775,6 +5803,21 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
u64 *metadata_bytes, u64 *qgroup_bytes)
|
||||
{
|
||||
*metadata_bytes = 0;
|
||||
*qgroup_bytes = 0;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
if (block_rsv->reserved < block_rsv->size)
|
||||
*metadata_bytes = block_rsv->size - block_rsv->reserved;
|
||||
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
|
||||
*qgroup_bytes = block_rsv->qgroup_rsv_size -
|
||||
block_rsv->qgroup_rsv_reserved;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_inode_rsv_refill - refill the inode block rsv.
|
||||
* @inode - the inode we are refilling.
|
||||
@ -5790,25 +5833,42 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
||||
u64 num_bytes = 0;
|
||||
u64 qgroup_num_bytes = 0;
|
||||
u64 num_bytes, last = 0;
|
||||
u64 qgroup_num_bytes;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
if (block_rsv->reserved < block_rsv->size)
|
||||
num_bytes = block_rsv->size - block_rsv->reserved;
|
||||
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
|
||||
qgroup_num_bytes = block_rsv->qgroup_rsv_size -
|
||||
block_rsv->qgroup_rsv_reserved;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
|
||||
if (num_bytes == 0)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
||||
do {
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
|
||||
true);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
||||
if (ret) {
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
|
||||
last = num_bytes;
|
||||
/*
|
||||
* If we are fragmented we can end up with a lot of
|
||||
* outstanding extents which will make our size be much
|
||||
* larger than our reserved amount.
|
||||
*
|
||||
* If the reservation happens here, it might be very
|
||||
* big though not needed in the end, if the delalloc
|
||||
* flushing happens.
|
||||
*
|
||||
* If this is the case try and do the reserve again.
|
||||
*/
|
||||
if (flush == BTRFS_RESERVE_FLUSH_ALL)
|
||||
calc_refill_bytes(block_rsv, &num_bytes,
|
||||
&qgroup_num_bytes);
|
||||
if (num_bytes == 0)
|
||||
return 0;
|
||||
}
|
||||
} while (ret && last != num_bytes);
|
||||
|
||||
if (!ret) {
|
||||
block_rsv_add_bytes(block_rsv, num_bytes, false);
|
||||
trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
||||
@ -5818,8 +5878,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
||||
spin_lock(&block_rsv->lock);
|
||||
block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
} else
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -8066,6 +8125,15 @@ loop:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
|
||||
do { \
|
||||
struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
|
||||
spin_lock(&__rsv->lock); \
|
||||
btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
|
||||
__rsv->size, __rsv->reserved); \
|
||||
spin_unlock(&__rsv->lock); \
|
||||
} while (0)
|
||||
|
||||
static void dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *info, u64 bytes,
|
||||
int dump_block_groups)
|
||||
@ -8085,6 +8153,12 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
info->bytes_readonly);
|
||||
spin_unlock(&info->lock);
|
||||
|
||||
DUMP_BLOCK_RSV(fs_info, global_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
|
||||
|
||||
if (!dump_block_groups)
|
||||
return;
|
||||
|
||||
@ -8492,7 +8566,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
clean_tree_block(fs_info, buf);
|
||||
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
|
||||
|
||||
btrfs_set_lock_blocking(buf);
|
||||
btrfs_set_lock_blocking_write(buf);
|
||||
set_extent_buffer_uptodate(buf);
|
||||
|
||||
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
|
||||
@ -8917,7 +8991,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||
reada = 1;
|
||||
}
|
||||
btrfs_tree_lock(next);
|
||||
btrfs_set_lock_blocking(next);
|
||||
btrfs_set_lock_blocking_write(next);
|
||||
|
||||
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
|
||||
&wc->refs[level - 1],
|
||||
@ -8977,7 +9051,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||
return -EIO;
|
||||
}
|
||||
btrfs_tree_lock(next);
|
||||
btrfs_set_lock_blocking(next);
|
||||
btrfs_set_lock_blocking_write(next);
|
||||
}
|
||||
|
||||
level--;
|
||||
@ -9089,7 +9163,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
||||
if (!path->locks[level]) {
|
||||
BUG_ON(level == 0);
|
||||
btrfs_tree_lock(eb);
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
||||
|
||||
ret = btrfs_lookup_extent_info(trans, fs_info,
|
||||
@ -9131,7 +9205,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
||||
if (!path->locks[level] &&
|
||||
btrfs_header_generation(eb) == trans->transid) {
|
||||
btrfs_tree_lock(eb);
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
||||
}
|
||||
clean_tree_block(fs_info, eb);
|
||||
@ -9298,7 +9372,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
|
||||
level = btrfs_header_level(root->node);
|
||||
path->nodes[level] = btrfs_lock_root_node(root);
|
||||
btrfs_set_lock_blocking(path->nodes[level]);
|
||||
btrfs_set_lock_blocking_write(path->nodes[level]);
|
||||
path->slots[level] = 0;
|
||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
||||
memset(&wc->update_progress, 0,
|
||||
@ -9328,7 +9402,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
level = btrfs_header_level(root->node);
|
||||
while (1) {
|
||||
btrfs_tree_lock(path->nodes[level]);
|
||||
btrfs_set_lock_blocking(path->nodes[level]);
|
||||
btrfs_set_lock_blocking_write(path->nodes[level]);
|
||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
||||
|
||||
ret = btrfs_lookup_extent_info(trans, fs_info,
|
||||
@ -9595,6 +9669,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
|
||||
{
|
||||
struct btrfs_space_info *sinfo = cache->space_info;
|
||||
u64 num_bytes;
|
||||
u64 sinfo_used;
|
||||
u64 min_allocable_bytes;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
@ -9621,9 +9696,10 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
|
||||
|
||||
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
|
||||
cache->bytes_super - btrfs_block_group_used(&cache->item);
|
||||
sinfo_used = btrfs_space_info_used(sinfo, true);
|
||||
|
||||
if (btrfs_space_info_used(sinfo, true) + num_bytes +
|
||||
min_allocable_bytes <= sinfo->total_bytes) {
|
||||
if (sinfo_used + num_bytes + min_allocable_bytes <=
|
||||
sinfo->total_bytes) {
|
||||
sinfo->bytes_readonly += num_bytes;
|
||||
cache->ro++;
|
||||
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
||||
@ -9632,6 +9708,15 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
|
||||
out:
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&sinfo->lock);
|
||||
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
|
||||
btrfs_info(cache->fs_info,
|
||||
"unable to make block group %llu ro",
|
||||
cache->key.objectid);
|
||||
btrfs_info(cache->fs_info,
|
||||
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
|
||||
sinfo_used, num_bytes, min_allocable_bytes);
|
||||
dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -10781,13 +10866,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
spin_lock(&trans->transaction->dirty_bgs_lock);
|
||||
if (!list_empty(&block_group->dirty_list)) {
|
||||
WARN_ON(1);
|
||||
}
|
||||
if (!list_empty(&block_group->io_list)) {
|
||||
WARN_ON(1);
|
||||
}
|
||||
WARN_ON(!list_empty(&block_group->dirty_list));
|
||||
WARN_ON(!list_empty(&block_group->io_list));
|
||||
spin_unlock(&trans->transaction->dirty_bgs_lock);
|
||||
|
||||
btrfs_remove_free_space_cache(block_group);
|
||||
|
||||
spin_lock(&block_group->space_info->lock);
|
||||
|
@ -147,7 +147,38 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void flush_write_bio(struct extent_page_data *epd);
|
||||
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
|
||||
unsigned long bio_flags)
|
||||
{
|
||||
blk_status_t ret = 0;
|
||||
struct bio_vec *bvec = bio_last_bvec_all(bio);
|
||||
struct page *page = bvec->bv_page;
|
||||
struct extent_io_tree *tree = bio->bi_private;
|
||||
u64 start;
|
||||
|
||||
start = page_offset(page) + bvec->bv_offset;
|
||||
|
||||
bio->bi_private = NULL;
|
||||
|
||||
if (tree->ops)
|
||||
ret = tree->ops->submit_bio_hook(tree->private_data, bio,
|
||||
mirror_num, bio_flags, start);
|
||||
else
|
||||
btrfsic_submit_bio(bio);
|
||||
|
||||
return blk_status_to_errno(ret);
|
||||
}
|
||||
|
||||
static void flush_write_bio(struct extent_page_data *epd)
|
||||
{
|
||||
if (epd->bio) {
|
||||
int ret;
|
||||
|
||||
ret = submit_one_bio(epd->bio, 0, 0);
|
||||
BUG_ON(ret < 0); /* -ENOMEM */
|
||||
epd->bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int __init extent_io_init(void)
|
||||
{
|
||||
@ -281,8 +312,8 @@ do_insert:
|
||||
}
|
||||
|
||||
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
|
||||
struct rb_node **prev_ret,
|
||||
struct rb_node **next_ret,
|
||||
struct rb_node **prev_ret,
|
||||
struct rb_node ***p_ret,
|
||||
struct rb_node **parent_ret)
|
||||
{
|
||||
@ -311,23 +342,23 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
|
||||
if (parent_ret)
|
||||
*parent_ret = prev;
|
||||
|
||||
if (prev_ret) {
|
||||
if (next_ret) {
|
||||
orig_prev = prev;
|
||||
while (prev && offset > prev_entry->end) {
|
||||
prev = rb_next(prev);
|
||||
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
|
||||
}
|
||||
*prev_ret = prev;
|
||||
*next_ret = prev;
|
||||
prev = orig_prev;
|
||||
}
|
||||
|
||||
if (next_ret) {
|
||||
if (prev_ret) {
|
||||
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
|
||||
while (prev && offset < prev_entry->start) {
|
||||
prev = rb_prev(prev);
|
||||
prev_entry = rb_entry(prev, struct tree_entry, rb_node);
|
||||
}
|
||||
*next_ret = prev;
|
||||
*prev_ret = prev;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -338,12 +369,12 @@ tree_search_for_insert(struct extent_io_tree *tree,
|
||||
struct rb_node ***p_ret,
|
||||
struct rb_node **parent_ret)
|
||||
{
|
||||
struct rb_node *prev = NULL;
|
||||
struct rb_node *next= NULL;
|
||||
struct rb_node *ret;
|
||||
|
||||
ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
|
||||
ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
|
||||
if (!ret)
|
||||
return prev;
|
||||
return next;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -585,7 +616,6 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
|
||||
if (delete)
|
||||
bits |= ~EXTENT_CTLBITS;
|
||||
bits |= EXTENT_FIRST_DELALLOC;
|
||||
|
||||
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
|
||||
clear = 1;
|
||||
@ -850,7 +880,6 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
|
||||
btrfs_debug_check_extent_io_range(tree, start, end);
|
||||
|
||||
bits |= EXTENT_FIRST_DELALLOC;
|
||||
again:
|
||||
if (!prealloc && gfpflags_allow_blocking(mask)) {
|
||||
/*
|
||||
@ -2692,28 +2721,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
|
||||
return bio;
|
||||
}
|
||||
|
||||
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
|
||||
unsigned long bio_flags)
|
||||
{
|
||||
blk_status_t ret = 0;
|
||||
struct bio_vec *bvec = bio_last_bvec_all(bio);
|
||||
struct page *page = bvec->bv_page;
|
||||
struct extent_io_tree *tree = bio->bi_private;
|
||||
u64 start;
|
||||
|
||||
start = page_offset(page) + bvec->bv_offset;
|
||||
|
||||
bio->bi_private = NULL;
|
||||
|
||||
if (tree->ops)
|
||||
ret = tree->ops->submit_bio_hook(tree->private_data, bio,
|
||||
mirror_num, bio_flags, start);
|
||||
else
|
||||
btrfsic_submit_bio(bio);
|
||||
|
||||
return blk_status_to_errno(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* @opf: bio REQ_OP_* and REQ_* flags as one value
|
||||
* @tree: tree so we can call our merge_bio hook
|
||||
@ -4007,17 +4014,6 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void flush_write_bio(struct extent_page_data *epd)
|
||||
{
|
||||
if (epd->bio) {
|
||||
int ret;
|
||||
|
||||
ret = submit_one_bio(epd->bio, 0, 0);
|
||||
BUG_ON(ret < 0); /* -ENOMEM */
|
||||
epd->bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
int ret;
|
||||
@ -4259,8 +4255,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
|
||||
if (len == 0)
|
||||
break;
|
||||
len = ALIGN(len, sectorsize);
|
||||
em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
|
||||
len, 0);
|
||||
em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
|
||||
if (IS_ERR_OR_NULL(em))
|
||||
return em;
|
||||
|
||||
|
@ -18,17 +18,16 @@
|
||||
#define EXTENT_BOUNDARY (1U << 9)
|
||||
#define EXTENT_NODATASUM (1U << 10)
|
||||
#define EXTENT_CLEAR_META_RESV (1U << 11)
|
||||
#define EXTENT_FIRST_DELALLOC (1U << 12)
|
||||
#define EXTENT_NEED_WAIT (1U << 13)
|
||||
#define EXTENT_DAMAGED (1U << 14)
|
||||
#define EXTENT_NORESERVE (1U << 15)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 16)
|
||||
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
|
||||
#define EXTENT_DELALLOC_NEW (1U << 18)
|
||||
#define EXTENT_NEED_WAIT (1U << 12)
|
||||
#define EXTENT_DAMAGED (1U << 13)
|
||||
#define EXTENT_NORESERVE (1U << 14)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 15)
|
||||
#define EXTENT_CLEAR_DATA_RESV (1U << 16)
|
||||
#define EXTENT_DELALLOC_NEW (1U << 17)
|
||||
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
|
||||
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
||||
EXTENT_CLEAR_DATA_RESV)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
|
||||
|
||||
/*
|
||||
* flags for bio submission. The high bits indicate the compression
|
||||
|
@ -210,6 +210,9 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
||||
if (!list_empty(&prev->list) || !list_empty(&next->list))
|
||||
return 0;
|
||||
|
||||
ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
|
||||
prev->block_start != EXTENT_MAP_DELALLOC);
|
||||
|
||||
if (extent_map_end(prev) == next->start &&
|
||||
prev->flags == next->flags &&
|
||||
prev->bdev == next->bdev &&
|
||||
@ -217,8 +220,6 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
||||
prev->block_start == EXTENT_MAP_HOLE) ||
|
||||
(next->block_start == EXTENT_MAP_INLINE &&
|
||||
prev->block_start == EXTENT_MAP_INLINE) ||
|
||||
(next->block_start == EXTENT_MAP_DELALLOC &&
|
||||
prev->block_start == EXTENT_MAP_DELALLOC) ||
|
||||
(next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
|
||||
next->block_start == extent_map_block_end(prev)))) {
|
||||
return 1;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
|
||||
#define EXTENT_MAP_HOLE ((u64)-3)
|
||||
#define EXTENT_MAP_INLINE ((u64)-2)
|
||||
/* used only during fiemap calls */
|
||||
#define EXTENT_MAP_DELALLOC ((u64)-1)
|
||||
|
||||
/* bits for the extent_map::flags field */
|
||||
|
@ -3218,8 +3218,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
|
||||
&cached_state);
|
||||
|
||||
while (start < inode->i_size) {
|
||||
em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
|
||||
start, len, 0);
|
||||
em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
em = NULL;
|
||||
|
213
fs/btrfs/inode.c
213
fs/btrfs/inode.c
@ -453,7 +453,6 @@ static noinline void compress_file_range(struct inode *inode,
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
u64 blocksize = fs_info->sectorsize;
|
||||
u64 actual_end;
|
||||
u64 isize = i_size_read(inode);
|
||||
int ret = 0;
|
||||
struct page **pages = NULL;
|
||||
unsigned long nr_pages;
|
||||
@ -467,7 +466,7 @@ static noinline void compress_file_range(struct inode *inode,
|
||||
inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
|
||||
SZ_16K);
|
||||
|
||||
actual_end = min_t(u64, isize, end + 1);
|
||||
actual_end = min_t(u64, i_size_read(inode), end + 1);
|
||||
again:
|
||||
will_compress = 0;
|
||||
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
|
||||
@ -714,9 +713,9 @@ static void free_async_extent_pages(struct async_extent *async_extent)
|
||||
* queued. We walk all the async extents created by compress_file_range
|
||||
* and send them down to the disk.
|
||||
*/
|
||||
static noinline void submit_compressed_extents(struct inode *inode,
|
||||
struct async_cow *async_cow)
|
||||
static noinline void submit_compressed_extents(struct async_cow *async_cow)
|
||||
{
|
||||
struct inode *inode = async_cow->inode;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct async_extent *async_extent;
|
||||
u64 alloc_hint = 0;
|
||||
@ -1166,8 +1165,14 @@ static noinline void async_cow_submit(struct btrfs_work *work)
|
||||
5 * SZ_1M)
|
||||
cond_wake_up_nomb(&fs_info->async_submit_wait);
|
||||
|
||||
/*
|
||||
* ->inode could be NULL if async_cow_start has failed to compress,
|
||||
* in which case we don't have anything to submit, yet we need to
|
||||
* always adjust ->async_delalloc_pages as its paired with the init
|
||||
* happening in cow_file_range_async
|
||||
*/
|
||||
if (async_cow->inode)
|
||||
submit_compressed_extents(async_cow->inode, async_cow);
|
||||
submit_compressed_extents(async_cow);
|
||||
}
|
||||
|
||||
static noinline void async_cow_free(struct btrfs_work *work)
|
||||
@ -1194,7 +1199,12 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
|
||||
while (start < end) {
|
||||
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
|
||||
BUG_ON(!async_cow); /* -ENOMEM */
|
||||
async_cow->inode = igrab(inode);
|
||||
/*
|
||||
* igrab is called higher up in the call chain, take only the
|
||||
* lightweight reference for the callback lifetime
|
||||
*/
|
||||
ihold(inode);
|
||||
async_cow->inode = inode;
|
||||
async_cow->fs_info = fs_info;
|
||||
async_cow->locked_page = locked_page;
|
||||
async_cow->start = start;
|
||||
@ -1586,11 +1596,10 @@ static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
|
||||
* Function to process delayed allocation (create CoW) for ranges which are
|
||||
* being touched for the first time.
|
||||
*/
|
||||
int btrfs_run_delalloc_range(void *private_data, struct page *locked_page,
|
||||
int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, int *page_started, unsigned long *nr_written,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = private_data;
|
||||
int ret;
|
||||
int force_cow = need_force_cow(inode, start, end);
|
||||
unsigned int write_flags = wbc_to_write_flags(wbc);
|
||||
@ -3247,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode)
|
||||
if (atomic_add_unless(&inode->i_count, -1, 1))
|
||||
return;
|
||||
|
||||
atomic_inc(&fs_info->nr_delayed_iputs);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
ASSERT(list_empty(&binode->delayed_iput));
|
||||
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
|
||||
@ -3267,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
list_del_init(&inode->delayed_iput);
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
iput(&inode->vfs_inode);
|
||||
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
|
||||
wake_up(&fs_info->delayed_iputs_wait);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
|
||||
* @fs_info - the fs_info for this fs
|
||||
* @return - EINTR if we were killed, 0 if nothing's pending
|
||||
*
|
||||
* This will wait on any delayed iputs that are currently running with KILLABLE
|
||||
* set. Once they are all done running we will return, unless we are killed in
|
||||
* which case we return EINTR. This helps in user operations like fallocate etc
|
||||
* that might get blocked on the iputs.
|
||||
*/
|
||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret = wait_event_killable(fs_info->delayed_iputs_wait,
|
||||
atomic_read(&fs_info->nr_delayed_iputs) == 0);
|
||||
if (ret)
|
||||
return -EINTR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates an orphan entry for the given inode in case something goes wrong
|
||||
* in the middle of an unlink.
|
||||
@ -5262,13 +5293,15 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
|
||||
int failures = 0;
|
||||
|
||||
for (;;) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_block_rsv_refill(root, rsv, rsv->size,
|
||||
ret = btrfs_block_rsv_refill(root, rsv,
|
||||
rsv->size + delayed_refs_extra,
|
||||
BTRFS_RESERVE_FLUSH_LIMIT);
|
||||
|
||||
if (ret && ++failures > 2) {
|
||||
@ -5277,9 +5310,28 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
|
||||
return ERR_PTR(-ENOSPC);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evict can generate a large amount of delayed refs without
|
||||
* having a way to add space back since we exhaust our temporary
|
||||
* block rsv. We aren't allowed to do FLUSH_ALL in this case
|
||||
* because we could deadlock with so many things in the flushing
|
||||
* code, so we have to try and hold some extra space to
|
||||
* compensate for our delayed ref generation. If we can't get
|
||||
* that space then we need see if we can steal our minimum from
|
||||
* the global reserve. We will be ratelimited by the amount of
|
||||
* space we have for the delayed refs rsv, so we'll end up
|
||||
* committing and trying again.
|
||||
*/
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans) || !ret)
|
||||
if (IS_ERR(trans) || !ret) {
|
||||
if (!IS_ERR(trans)) {
|
||||
trans->block_rsv = &fs_info->trans_block_rsv;
|
||||
trans->bytes_reserved = delayed_refs_extra;
|
||||
btrfs_block_rsv_migrate(rsv, trans->block_rsv,
|
||||
delayed_refs_extra, 1);
|
||||
}
|
||||
return trans;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to steal from the global reserve if there is space for
|
||||
@ -6731,7 +6783,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
u64 extent_start = 0;
|
||||
u64 extent_end = 0;
|
||||
u64 objectid = btrfs_ino(inode);
|
||||
u32 found_type;
|
||||
u8 extent_type;
|
||||
struct btrfs_path *path = NULL;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_file_extent_item *item;
|
||||
@ -6786,9 +6838,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
} else if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
goto not_found;
|
||||
path->slots[0]--;
|
||||
@ -6797,11 +6847,9 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
/* are we inside the extent that was found? */
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
found_type = found_key.type;
|
||||
if (found_key.objectid != objectid ||
|
||||
found_type != BTRFS_EXTENT_DATA_KEY) {
|
||||
found_key.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
/*
|
||||
* If we backup past the first extent we want to move forward
|
||||
* and see if there is an extent in front of us, otherwise we'll
|
||||
@ -6812,16 +6860,16 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
goto next;
|
||||
}
|
||||
|
||||
found_type = btrfs_file_extent_type(leaf, item);
|
||||
extent_type = btrfs_file_extent_type(leaf, item);
|
||||
extent_start = found_key.offset;
|
||||
if (found_type == BTRFS_FILE_EXTENT_REG ||
|
||||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
if (extent_type == BTRFS_FILE_EXTENT_REG ||
|
||||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
extent_end = extent_start +
|
||||
btrfs_file_extent_num_bytes(leaf, item);
|
||||
|
||||
trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
|
||||
extent_start);
|
||||
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
size_t size;
|
||||
|
||||
size = btrfs_file_extent_ram_bytes(leaf, item);
|
||||
@ -6840,9 +6888,9 @@ next:
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out;
|
||||
}
|
||||
if (ret > 0)
|
||||
} else if (ret > 0) {
|
||||
goto not_found;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
@ -6853,19 +6901,22 @@ next:
|
||||
goto not_found;
|
||||
if (start > found_key.offset)
|
||||
goto next;
|
||||
|
||||
/* New extent overlaps with existing one */
|
||||
em->start = start;
|
||||
em->orig_start = start;
|
||||
em->len = found_key.offset - start;
|
||||
goto not_found_em;
|
||||
em->block_start = EXTENT_MAP_HOLE;
|
||||
goto insert;
|
||||
}
|
||||
|
||||
btrfs_extent_item_to_extent_map(inode, path, item,
|
||||
new_inline, em);
|
||||
|
||||
if (found_type == BTRFS_FILE_EXTENT_REG ||
|
||||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
if (extent_type == BTRFS_FILE_EXTENT_REG ||
|
||||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
goto insert;
|
||||
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
unsigned long ptr;
|
||||
char *map;
|
||||
size_t size;
|
||||
@ -6916,7 +6967,6 @@ not_found:
|
||||
em->start = start;
|
||||
em->orig_start = start;
|
||||
em->len = len;
|
||||
not_found_em:
|
||||
em->block_start = EXTENT_MAP_HOLE;
|
||||
insert:
|
||||
btrfs_release_path(path);
|
||||
@ -6946,19 +6996,17 @@ out:
|
||||
}
|
||||
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
struct page *page,
|
||||
size_t pg_offset, u64 start, u64 len,
|
||||
int create)
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct extent_map *hole_em = NULL;
|
||||
u64 range_start = start;
|
||||
u64 delalloc_start = start;
|
||||
u64 end;
|
||||
u64 found;
|
||||
u64 found_end;
|
||||
u64 delalloc_len;
|
||||
u64 delalloc_end;
|
||||
int err = 0;
|
||||
|
||||
em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
|
||||
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
|
||||
if (IS_ERR(em))
|
||||
return em;
|
||||
/*
|
||||
@ -6983,80 +7031,84 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
em = NULL;
|
||||
|
||||
/* ok, we didn't find anything, lets look for delalloc */
|
||||
found = count_range_bits(&inode->io_tree, &range_start,
|
||||
delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
|
||||
end, len, EXTENT_DELALLOC, 1);
|
||||
found_end = range_start + found;
|
||||
if (found_end < range_start)
|
||||
found_end = (u64)-1;
|
||||
delalloc_end = delalloc_start + delalloc_len;
|
||||
if (delalloc_end < delalloc_start)
|
||||
delalloc_end = (u64)-1;
|
||||
|
||||
/*
|
||||
* we didn't find anything useful, return
|
||||
* the original results from get_extent()
|
||||
* We didn't find anything useful, return the original results from
|
||||
* get_extent()
|
||||
*/
|
||||
if (range_start > end || found_end <= start) {
|
||||
if (delalloc_start > end || delalloc_end <= start) {
|
||||
em = hole_em;
|
||||
hole_em = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* adjust the range_start to make sure it doesn't
|
||||
* go backwards from the start they passed in
|
||||
/*
|
||||
* Adjust the delalloc_start to make sure it doesn't go backwards from
|
||||
* the start they passed in
|
||||
*/
|
||||
range_start = max(start, range_start);
|
||||
found = found_end - range_start;
|
||||
delalloc_start = max(start, delalloc_start);
|
||||
delalloc_len = delalloc_end - delalloc_start;
|
||||
|
||||
if (found > 0) {
|
||||
u64 hole_start = start;
|
||||
u64 hole_len = len;
|
||||
if (delalloc_len > 0) {
|
||||
u64 hole_start;
|
||||
u64 hole_len;
|
||||
const u64 hole_end = extent_map_end(hole_em);
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* when btrfs_get_extent can't find anything it
|
||||
* returns one huge hole
|
||||
*
|
||||
* make sure what it found really fits our range, and
|
||||
* adjust to make sure it is based on the start from
|
||||
* the caller
|
||||
*/
|
||||
if (hole_em) {
|
||||
u64 calc_end = extent_map_end(hole_em);
|
||||
|
||||
if (calc_end <= start || (hole_em->start > end)) {
|
||||
free_extent_map(hole_em);
|
||||
hole_em = NULL;
|
||||
} else {
|
||||
hole_start = max(hole_em->start, start);
|
||||
hole_len = calc_end - hole_start;
|
||||
}
|
||||
}
|
||||
em->bdev = NULL;
|
||||
if (hole_em && range_start > hole_start) {
|
||||
/* our hole starts before our delalloc, so we
|
||||
* have to return just the parts of the hole
|
||||
* that go until the delalloc starts
|
||||
|
||||
ASSERT(hole_em);
|
||||
/*
|
||||
* When btrfs_get_extent can't find anything it returns one
|
||||
* huge hole
|
||||
*
|
||||
* Make sure what it found really fits our range, and adjust to
|
||||
* make sure it is based on the start from the caller
|
||||
*/
|
||||
if (hole_end <= start || hole_em->start > end) {
|
||||
free_extent_map(hole_em);
|
||||
hole_em = NULL;
|
||||
} else {
|
||||
hole_start = max(hole_em->start, start);
|
||||
hole_len = hole_end - hole_start;
|
||||
}
|
||||
|
||||
if (hole_em && delalloc_start > hole_start) {
|
||||
/*
|
||||
* Our hole starts before our delalloc, so we have to
|
||||
* return just the parts of the hole that go until the
|
||||
* delalloc starts
|
||||
*/
|
||||
em->len = min(hole_len,
|
||||
range_start - hole_start);
|
||||
em->len = min(hole_len, delalloc_start - hole_start);
|
||||
em->start = hole_start;
|
||||
em->orig_start = hole_start;
|
||||
/*
|
||||
* don't adjust block start at all,
|
||||
* it is fixed at EXTENT_MAP_HOLE
|
||||
* Don't adjust block start at all, it is fixed at
|
||||
* EXTENT_MAP_HOLE
|
||||
*/
|
||||
em->block_start = hole_em->block_start;
|
||||
em->block_len = hole_len;
|
||||
if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
|
||||
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
|
||||
} else {
|
||||
em->start = range_start;
|
||||
em->len = found;
|
||||
em->orig_start = range_start;
|
||||
/*
|
||||
* Hole is out of passed range or it starts after
|
||||
* delalloc range
|
||||
*/
|
||||
em->start = delalloc_start;
|
||||
em->len = delalloc_len;
|
||||
em->orig_start = delalloc_start;
|
||||
em->block_start = EXTENT_MAP_DELALLOC;
|
||||
em->block_len = found;
|
||||
em->block_len = delalloc_len;
|
||||
}
|
||||
} else {
|
||||
return hole_em;
|
||||
@ -9910,7 +9962,6 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
|
||||
init_completion(&work->completion);
|
||||
INIT_LIST_HEAD(&work->list);
|
||||
work->inode = inode;
|
||||
WARN_ON_ONCE(!inode);
|
||||
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
|
||||
btrfs_run_delalloc_work, NULL, NULL);
|
||||
|
||||
|
@ -1642,7 +1642,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
|
||||
btrfs_info(fs_info, "resizing devid %llu", devid);
|
||||
}
|
||||
|
||||
device = btrfs_find_device(fs_info, devid, NULL, NULL);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
||||
if (!device) {
|
||||
btrfs_info(fs_info, "resizer unable to find device %llu",
|
||||
devid);
|
||||
@ -3178,7 +3178,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
|
||||
s_uuid = di_args->uuid;
|
||||
|
||||
rcu_read_lock();
|
||||
dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
|
||||
NULL, true);
|
||||
|
||||
if (!dev) {
|
||||
ret = -ENODEV;
|
||||
@ -3241,32 +3242,17 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
}
|
||||
|
||||
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
|
||||
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
|
||||
int ret;
|
||||
u64 len = olen;
|
||||
|
||||
if (loff + len == src->i_size)
|
||||
len = ALIGN(src->i_size, bs) - loff;
|
||||
/*
|
||||
* For same inode case we don't want our length pushed out past i_size
|
||||
* as comparing that data range makes no sense.
|
||||
*
|
||||
* This effectively means we require aligned extents for the single
|
||||
* inode case, whereas the other cases allow an unaligned length so long
|
||||
* as it ends at i_size.
|
||||
*/
|
||||
if (dst == src && len != olen)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Lock destination range to serialize with concurrent readpages() and
|
||||
* source range to serialize with relocation.
|
||||
*/
|
||||
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
|
||||
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
|
||||
ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1);
|
||||
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
|
||||
|
||||
return ret;
|
||||
@ -3278,21 +3264,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
int ret;
|
||||
int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
|
||||
u64 i, tail_len, chunk_count;
|
||||
|
||||
/* don't make the dst file partly checksummed */
|
||||
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
|
||||
(BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM))
|
||||
return -EINVAL;
|
||||
|
||||
if (IS_SWAPFILE(src) || IS_SWAPFILE(dst))
|
||||
return -ETXTBSY;
|
||||
|
||||
tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
|
||||
chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
|
||||
if (chunk_count == 0)
|
||||
num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
|
||||
|
||||
for (i = 0; i < chunk_count; i++) {
|
||||
ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
|
||||
@ -3908,14 +3883,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
* be either compressed or non-compressed.
|
||||
*/
|
||||
|
||||
/* don't make the dst file partly checksummed */
|
||||
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
|
||||
(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
|
||||
return -EINVAL;
|
||||
|
||||
if (IS_SWAPFILE(src) || IS_SWAPFILE(inode))
|
||||
return -ETXTBSY;
|
||||
|
||||
/*
|
||||
* VFS's generic_remap_file_range_prep() protects us from cloning the
|
||||
* eof block into the middle of a file, which would result in corruption
|
||||
@ -3991,6 +3958,13 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
||||
else
|
||||
btrfs_double_inode_lock(inode_in, inode_out);
|
||||
|
||||
/* don't make the dst file partly checksummed */
|
||||
if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
|
||||
(BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that the inodes are locked, we need to start writeback ourselves
|
||||
* and can not rely on the writeback from the VFS's generic helper
|
||||
@ -4381,7 +4355,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
|
||||
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
|
||||
0);
|
||||
|
||||
if (copy_to_user(arg, sa, sizeof(*sa)))
|
||||
if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
|
||||
ret = -EFAULT;
|
||||
|
||||
if (!(sa->flags & BTRFS_SCRUB_READONLY))
|
||||
@ -4414,7 +4388,7 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
|
||||
|
||||
ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
|
||||
|
||||
if (copy_to_user(arg, sa, sizeof(*sa)))
|
||||
if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(sa);
|
||||
@ -4438,7 +4412,7 @@ static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
|
||||
|
||||
ret = btrfs_get_dev_stats(fs_info, sa);
|
||||
|
||||
if (copy_to_user(arg, sa, sizeof(*sa)))
|
||||
if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(sa);
|
||||
@ -4484,7 +4458,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
|
||||
break;
|
||||
}
|
||||
|
||||
if (copy_to_user(arg, p, sizeof(*p)))
|
||||
if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p)))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
kfree(p);
|
||||
@ -4790,7 +4764,7 @@ do_balance:
|
||||
ret = btrfs_balance(fs_info, bctl, bargs);
|
||||
bctl = NULL;
|
||||
|
||||
if (arg) {
|
||||
if ((ret == 0 || ret == -ECANCELED) && arg) {
|
||||
if (copy_to_user(arg, bargs, sizeof(*bargs)))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
@ -14,43 +14,58 @@
|
||||
|
||||
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
|
||||
|
||||
/*
|
||||
* if we currently have a spinning reader or writer lock
|
||||
* (indicated by the rw flag) this will bump the count
|
||||
* of blocking holders and drop the spinlock.
|
||||
*/
|
||||
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
||||
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
|
||||
{
|
||||
/*
|
||||
* no lock is required. The lock owner may change if
|
||||
* we have a read lock, but it won't change to or away
|
||||
* from us. If we have the write lock, we are the owner
|
||||
* and it'll never change.
|
||||
* No lock is required. The lock owner may change if we have a read
|
||||
* lock, but it won't change to or away from us. If we have the write
|
||||
* lock, we are the owner and it'll never change.
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
if (rw == BTRFS_WRITE_LOCK) {
|
||||
if (atomic_read(&eb->blocking_writers) == 0) {
|
||||
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
|
||||
atomic_dec(&eb->spinning_writers);
|
||||
btrfs_assert_tree_locked(eb);
|
||||
atomic_inc(&eb->blocking_writers);
|
||||
write_unlock(&eb->lock);
|
||||
}
|
||||
} else if (rw == BTRFS_READ_LOCK) {
|
||||
btrfs_assert_tree_read_locked(eb);
|
||||
atomic_inc(&eb->blocking_readers);
|
||||
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
|
||||
atomic_dec(&eb->spinning_readers);
|
||||
read_unlock(&eb->lock);
|
||||
btrfs_assert_tree_read_locked(eb);
|
||||
atomic_inc(&eb->blocking_readers);
|
||||
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
|
||||
atomic_dec(&eb->spinning_readers);
|
||||
read_unlock(&eb->lock);
|
||||
}
|
||||
|
||||
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
|
||||
{
|
||||
/*
|
||||
* No lock is required. The lock owner may change if we have a read
|
||||
* lock, but it won't change to or away from us. If we have the write
|
||||
* lock, we are the owner and it'll never change.
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
if (atomic_read(&eb->blocking_writers) == 0) {
|
||||
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
|
||||
atomic_dec(&eb->spinning_writers);
|
||||
btrfs_assert_tree_locked(eb);
|
||||
atomic_inc(&eb->blocking_writers);
|
||||
write_unlock(&eb->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if we currently have a blocking lock, take the spinlock
|
||||
* and drop our blocking count
|
||||
*/
|
||||
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
||||
void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
|
||||
{
|
||||
/*
|
||||
* No lock is required. The lock owner may change if we have a read
|
||||
* lock, but it won't change to or away from us. If we have the write
|
||||
* lock, we are the owner and it'll never change.
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
|
||||
read_lock(&eb->lock);
|
||||
atomic_inc(&eb->spinning_readers);
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
if (atomic_dec_and_test(&eb->blocking_readers))
|
||||
cond_wake_up_nomb(&eb->read_lock_wq);
|
||||
}
|
||||
|
||||
void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
|
||||
{
|
||||
/*
|
||||
* no lock is required. The lock owner may change if
|
||||
@ -60,23 +75,13 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
|
||||
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
|
||||
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
|
||||
write_lock(&eb->lock);
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
atomic_inc(&eb->spinning_writers);
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
if (atomic_dec_and_test(&eb->blocking_writers))
|
||||
cond_wake_up_nomb(&eb->write_lock_wq);
|
||||
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
|
||||
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
|
||||
read_lock(&eb->lock);
|
||||
atomic_inc(&eb->spinning_readers);
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
if (atomic_dec_and_test(&eb->blocking_readers))
|
||||
cond_wake_up_nomb(&eb->read_lock_wq);
|
||||
}
|
||||
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
|
||||
write_lock(&eb->lock);
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
atomic_inc(&eb->spinning_writers);
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
if (atomic_dec_and_test(&eb->blocking_writers))
|
||||
cond_wake_up_nomb(&eb->write_lock_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -232,16 +237,9 @@ again:
|
||||
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
|
||||
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
|
||||
write_lock(&eb->lock);
|
||||
if (atomic_read(&eb->blocking_readers)) {
|
||||
if (atomic_read(&eb->blocking_readers) ||
|
||||
atomic_read(&eb->blocking_writers)) {
|
||||
write_unlock(&eb->lock);
|
||||
wait_event(eb->read_lock_wq,
|
||||
atomic_read(&eb->blocking_readers) == 0);
|
||||
goto again;
|
||||
}
|
||||
if (atomic_read(&eb->blocking_writers)) {
|
||||
write_unlock(&eb->lock);
|
||||
wait_event(eb->write_lock_wq,
|
||||
atomic_read(&eb->blocking_writers) == 0);
|
||||
goto again;
|
||||
}
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
|
@ -17,8 +17,10 @@ void btrfs_tree_unlock(struct extent_buffer *eb);
|
||||
void btrfs_tree_read_lock(struct extent_buffer *eb);
|
||||
void btrfs_tree_read_unlock(struct extent_buffer *eb);
|
||||
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
|
||||
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
|
||||
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
|
||||
void btrfs_set_lock_blocking_read(struct extent_buffer *eb);
|
||||
void btrfs_set_lock_blocking_write(struct extent_buffer *eb);
|
||||
void btrfs_clear_lock_blocking_read(struct extent_buffer *eb);
|
||||
void btrfs_clear_lock_blocking_write(struct extent_buffer *eb);
|
||||
void btrfs_assert_tree_locked(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
|
||||
@ -37,13 +39,4 @@ static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
|
||||
{
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
|
||||
}
|
||||
|
||||
static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
|
||||
{
|
||||
btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
|
||||
}
|
||||
#endif
|
||||
|
@ -61,6 +61,28 @@ struct workspace {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static struct workspace_manager wsm;
|
||||
|
||||
static void lzo_init_workspace_manager(void)
|
||||
{
|
||||
btrfs_init_workspace_manager(&wsm, &btrfs_lzo_compress);
|
||||
}
|
||||
|
||||
static void lzo_cleanup_workspace_manager(void)
|
||||
{
|
||||
btrfs_cleanup_workspace_manager(&wsm);
|
||||
}
|
||||
|
||||
static struct list_head *lzo_get_workspace(unsigned int level)
|
||||
{
|
||||
return btrfs_get_workspace(&wsm, level);
|
||||
}
|
||||
|
||||
static void lzo_put_workspace(struct list_head *ws)
|
||||
{
|
||||
btrfs_put_workspace(&wsm, ws);
|
||||
}
|
||||
|
||||
static void lzo_free_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
@ -71,7 +93,7 @@ static void lzo_free_workspace(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *lzo_alloc_workspace(void)
|
||||
static struct list_head *lzo_alloc_workspace(unsigned int level)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
|
||||
@ -485,11 +507,16 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void lzo_set_level(struct list_head *ws, unsigned int type)
|
||||
static unsigned int lzo_set_level(unsigned int level)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_lzo_compress = {
|
||||
.init_workspace_manager = lzo_init_workspace_manager,
|
||||
.cleanup_workspace_manager = lzo_cleanup_workspace_manager,
|
||||
.get_workspace = lzo_get_workspace,
|
||||
.put_workspace = lzo_put_workspace,
|
||||
.alloc_workspace = lzo_alloc_workspace,
|
||||
.free_workspace = lzo_free_workspace,
|
||||
.compress_pages = lzo_compress_pages,
|
||||
|
@ -1546,12 +1546,18 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
|
||||
parent_node = *p;
|
||||
entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
|
||||
node);
|
||||
if (bytenr < entry->bytenr)
|
||||
if (bytenr < entry->bytenr) {
|
||||
p = &(*p)->rb_left;
|
||||
else if (bytenr > entry->bytenr)
|
||||
} else if (bytenr > entry->bytenr) {
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
} else {
|
||||
if (record->data_rsv && !entry->data_rsv) {
|
||||
entry->data_rsv = record->data_rsv;
|
||||
entry->data_rsv_refroot =
|
||||
record->data_rsv_refroot;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&record->node, parent_node, p);
|
||||
@ -1597,7 +1603,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
|
||||
|| bytenr == 0 || num_bytes == 0)
|
||||
return 0;
|
||||
record = kmalloc(sizeof(*record), gfp_flag);
|
||||
record = kzalloc(sizeof(*record), gfp_flag);
|
||||
if (!record)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1832,7 +1838,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
|
||||
src_path->nodes[cur_level] = eb;
|
||||
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
|
||||
}
|
||||
|
||||
@ -1973,7 +1979,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
|
||||
dst_path->slots[cur_level] = 0;
|
||||
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
|
||||
need_cleanup = true;
|
||||
}
|
||||
@ -2017,86 +2023,30 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inform qgroup to trace subtree swap used in balance.
|
||||
*
|
||||
* Unlike btrfs_qgroup_trace_subtree(), this function will only trace
|
||||
* new tree blocks whose generation is equal to (or larger than) @last_snapshot.
|
||||
*
|
||||
* Will go down the tree block pointed by @dst_eb (pointed by @dst_parent and
|
||||
* @dst_slot), and find any tree blocks whose generation is at @last_snapshot,
|
||||
* and then go down @src_eb (pointed by @src_parent and @src_slot) to find
|
||||
* the counterpart of the tree block, then mark both tree blocks as qgroup dirty,
|
||||
* and skip all tree blocks whose generation is smaller than last_snapshot.
|
||||
*
|
||||
* This would skip tons of tree blocks of original btrfs_qgroup_trace_subtree(),
|
||||
* which could be the cause of very slow balance if the file tree is large.
|
||||
*
|
||||
* @src_parent, @src_slot: pointer to src (file tree) eb.
|
||||
* @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
|
||||
*/
|
||||
int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group_cache *bg_cache,
|
||||
struct extent_buffer *src_parent, int src_slot,
|
||||
struct extent_buffer *dst_parent, int dst_slot,
|
||||
u64 last_snapshot)
|
||||
static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *src_eb,
|
||||
struct extent_buffer *dst_eb,
|
||||
u64 last_snapshot, bool trace_leaf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_path *dst_path = NULL;
|
||||
struct btrfs_key first_key;
|
||||
struct extent_buffer *src_eb = NULL;
|
||||
struct extent_buffer *dst_eb = NULL;
|
||||
bool trace_leaf = false;
|
||||
u64 child_gen;
|
||||
u64 child_bytenr;
|
||||
int level;
|
||||
int ret;
|
||||
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return 0;
|
||||
|
||||
/* Check parameter order */
|
||||
if (btrfs_node_ptr_generation(src_parent, src_slot) >
|
||||
btrfs_node_ptr_generation(dst_parent, dst_slot)) {
|
||||
/* Wrong parameter order */
|
||||
if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
|
||||
btrfs_err_rl(fs_info,
|
||||
"%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
|
||||
btrfs_node_ptr_generation(src_parent, src_slot),
|
||||
btrfs_node_ptr_generation(dst_parent, dst_slot));
|
||||
btrfs_header_generation(src_eb),
|
||||
btrfs_header_generation(dst_eb));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only trace leaf if we're relocating data block groups, this could
|
||||
* reduce tons of data extents tracing for meta/sys bg relocation.
|
||||
*/
|
||||
if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
trace_leaf = true;
|
||||
/* Read out real @src_eb, pointed by @src_parent and @src_slot */
|
||||
child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
|
||||
child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
|
||||
btrfs_node_key_to_cpu(src_parent, &first_key, src_slot);
|
||||
|
||||
src_eb = read_tree_block(fs_info, child_bytenr, child_gen,
|
||||
btrfs_header_level(src_parent) - 1, &first_key);
|
||||
if (IS_ERR(src_eb)) {
|
||||
ret = PTR_ERR(src_eb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Read out real @dst_eb, pointed by @src_parent and @src_slot */
|
||||
child_bytenr = btrfs_node_blockptr(dst_parent, dst_slot);
|
||||
child_gen = btrfs_node_ptr_generation(dst_parent, dst_slot);
|
||||
btrfs_node_key_to_cpu(dst_parent, &first_key, dst_slot);
|
||||
|
||||
dst_eb = read_tree_block(fs_info, child_bytenr, child_gen,
|
||||
btrfs_header_level(dst_parent) - 1, &first_key);
|
||||
if (IS_ERR(dst_eb)) {
|
||||
ret = PTR_ERR(dst_eb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
|
||||
ret = -EINVAL;
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2106,14 +2056,13 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* For dst_path */
|
||||
extent_buffer_get(dst_eb);
|
||||
dst_path->nodes[level] = dst_eb;
|
||||
dst_path->slots[level] = 0;
|
||||
dst_path->locks[level] = 0;
|
||||
|
||||
/* Do the generation-aware breadth-first search */
|
||||
/* Do the generation aware breadth-first search */
|
||||
ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
|
||||
level, last_snapshot, trace_leaf);
|
||||
if (ret < 0)
|
||||
@ -2121,8 +2070,6 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
free_extent_buffer(src_eb);
|
||||
free_extent_buffer(dst_eb);
|
||||
btrfs_free_path(dst_path);
|
||||
if (ret < 0)
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
@ -2207,7 +2154,7 @@ walk_down:
|
||||
path->slots[level] = 0;
|
||||
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
|
||||
|
||||
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
|
||||
@ -2576,6 +2523,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Free the reserved data space */
|
||||
btrfs_qgroup_free_refroot(fs_info,
|
||||
record->data_rsv_refroot,
|
||||
record->data_rsv,
|
||||
BTRFS_QGROUP_RSV_DATA);
|
||||
/*
|
||||
* Use SEQ_LAST as time_seq to do special search, which
|
||||
* doesn't lock tree or delayed_refs and search current
|
||||
@ -2842,16 +2794,15 @@ out:
|
||||
/*
|
||||
* Two limits to commit transaction in advance.
|
||||
*
|
||||
* For RATIO, it will be 1/RATIO of the remaining limit
|
||||
* (excluding data and prealloc meta) as threshold.
|
||||
* For RATIO, it will be 1/RATIO of the remaining limit as threshold.
|
||||
* For SIZE, it will be in byte unit as threshold.
|
||||
*/
|
||||
#define QGROUP_PERTRANS_RATIO 32
|
||||
#define QGROUP_PERTRANS_SIZE SZ_32M
|
||||
#define QGROUP_FREE_RATIO 32
|
||||
#define QGROUP_FREE_SIZE SZ_32M
|
||||
static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_qgroup *qg, u64 num_bytes)
|
||||
{
|
||||
u64 limit;
|
||||
u64 free;
|
||||
u64 threshold;
|
||||
|
||||
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
|
||||
@ -2870,20 +2821,21 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER |
|
||||
BTRFS_QGROUP_LIMIT_MAX_EXCL))) {
|
||||
if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
|
||||
limit = qg->max_excl;
|
||||
else
|
||||
limit = qg->max_rfer;
|
||||
threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] -
|
||||
qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) /
|
||||
QGROUP_PERTRANS_RATIO;
|
||||
threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE);
|
||||
if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
|
||||
free = qg->max_excl - qgroup_rsv_total(qg) - qg->excl;
|
||||
threshold = min_t(u64, qg->max_excl / QGROUP_FREE_RATIO,
|
||||
QGROUP_FREE_SIZE);
|
||||
} else {
|
||||
free = qg->max_rfer - qgroup_rsv_total(qg) - qg->rfer;
|
||||
threshold = min_t(u64, qg->max_rfer / QGROUP_FREE_RATIO,
|
||||
QGROUP_FREE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use transaction_kthread to commit transaction, so we no
|
||||
* longer need to bother nested transaction nor lock context.
|
||||
*/
|
||||
if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold)
|
||||
if (free < threshold)
|
||||
btrfs_commit_transaction_locksafe(fs_info);
|
||||
}
|
||||
|
||||
@ -2959,7 +2911,6 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
||||
|
||||
qg = unode_aux_to_qgroup(unode);
|
||||
|
||||
trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
|
||||
qgroup_rsv_add(fs_info, qg, num_bytes, type);
|
||||
}
|
||||
|
||||
@ -3026,7 +2977,6 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
|
||||
qg = unode_aux_to_qgroup(unode);
|
||||
|
||||
trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
|
||||
qgroup_rsv_release(fs_info, qg, num_bytes, type);
|
||||
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
@ -3783,3 +3733,241 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
|
||||
}
|
||||
extent_changeset_release(&changeset);
|
||||
}
|
||||
|
||||
void btrfs_qgroup_init_swapped_blocks(
|
||||
struct btrfs_qgroup_swapped_blocks *swapped_blocks)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock_init(&swapped_blocks->lock);
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
|
||||
swapped_blocks->blocks[i] = RB_ROOT;
|
||||
swapped_blocks->swapped = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete all swapped blocks record of @root.
|
||||
* Every record here means we skipped a full subtree scan for qgroup.
|
||||
*
|
||||
* Gets called when committing one transaction.
|
||||
*/
|
||||
void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_qgroup_swapped_blocks *swapped_blocks;
|
||||
int i;
|
||||
|
||||
swapped_blocks = &root->swapped_blocks;
|
||||
|
||||
spin_lock(&swapped_blocks->lock);
|
||||
if (!swapped_blocks->swapped)
|
||||
goto out;
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
|
||||
struct rb_root *cur_root = &swapped_blocks->blocks[i];
|
||||
struct btrfs_qgroup_swapped_block *entry;
|
||||
struct btrfs_qgroup_swapped_block *next;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(entry, next, cur_root,
|
||||
node)
|
||||
kfree(entry);
|
||||
swapped_blocks->blocks[i] = RB_ROOT;
|
||||
}
|
||||
swapped_blocks->swapped = false;
|
||||
out:
|
||||
spin_unlock(&swapped_blocks->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add subtree roots record into @subvol_root.
|
||||
*
|
||||
* @subvol_root: tree root of the subvolume tree get swapped
|
||||
* @bg: block group under balance
|
||||
* @subvol_parent/slot: pointer to the subtree root in subvolume tree
|
||||
* @reloc_parent/slot: pointer to the subtree root in reloc tree
|
||||
* BOTH POINTERS ARE BEFORE TREE SWAP
|
||||
* @last_snapshot: last snapshot generation of the subvolume tree
|
||||
*/
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *subvol_root,
|
||||
struct btrfs_block_group_cache *bg,
|
||||
struct extent_buffer *subvol_parent, int subvol_slot,
|
||||
struct extent_buffer *reloc_parent, int reloc_slot,
|
||||
u64 last_snapshot)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = subvol_root->fs_info;
|
||||
struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
|
||||
struct btrfs_qgroup_swapped_block *block;
|
||||
struct rb_node **cur;
|
||||
struct rb_node *parent = NULL;
|
||||
int level = btrfs_header_level(subvol_parent) - 1;
|
||||
int ret = 0;
|
||||
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return 0;
|
||||
|
||||
if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
|
||||
btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
|
||||
btrfs_err_rl(fs_info,
|
||||
"%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
|
||||
__func__,
|
||||
btrfs_node_ptr_generation(subvol_parent, subvol_slot),
|
||||
btrfs_node_ptr_generation(reloc_parent, reloc_slot));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
block = kmalloc(sizeof(*block), GFP_NOFS);
|
||||
if (!block) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* @reloc_parent/slot is still before swap, while @block is going to
|
||||
* record the bytenr after swap, so we do the swap here.
|
||||
*/
|
||||
block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot);
|
||||
block->subvol_generation = btrfs_node_ptr_generation(reloc_parent,
|
||||
reloc_slot);
|
||||
block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot);
|
||||
block->reloc_generation = btrfs_node_ptr_generation(subvol_parent,
|
||||
subvol_slot);
|
||||
block->last_snapshot = last_snapshot;
|
||||
block->level = level;
|
||||
if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
block->trace_leaf = true;
|
||||
else
|
||||
block->trace_leaf = false;
|
||||
btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot);
|
||||
|
||||
/* Insert @block into @blocks */
|
||||
spin_lock(&blocks->lock);
|
||||
cur = &blocks->blocks[level].rb_node;
|
||||
while (*cur) {
|
||||
struct btrfs_qgroup_swapped_block *entry;
|
||||
|
||||
parent = *cur;
|
||||
entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
|
||||
node);
|
||||
|
||||
if (entry->subvol_bytenr < block->subvol_bytenr) {
|
||||
cur = &(*cur)->rb_left;
|
||||
} else if (entry->subvol_bytenr > block->subvol_bytenr) {
|
||||
cur = &(*cur)->rb_right;
|
||||
} else {
|
||||
if (entry->subvol_generation !=
|
||||
block->subvol_generation ||
|
||||
entry->reloc_bytenr != block->reloc_bytenr ||
|
||||
entry->reloc_generation !=
|
||||
block->reloc_generation) {
|
||||
/*
|
||||
* Duplicated but mismatch entry found.
|
||||
* Shouldn't happen.
|
||||
*
|
||||
* Marking qgroup inconsistent should be enough
|
||||
* for end users.
|
||||
*/
|
||||
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
|
||||
ret = -EEXIST;
|
||||
}
|
||||
kfree(block);
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
rb_link_node(&block->node, parent, cur);
|
||||
rb_insert_color(&block->node, &blocks->blocks[level]);
|
||||
blocks->swapped = true;
|
||||
out_unlock:
|
||||
spin_unlock(&blocks->lock);
|
||||
out:
|
||||
if (ret < 0)
|
||||
fs_info->qgroup_flags |=
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the tree block is a subtree root, and if so do the needed
|
||||
* delayed subtree trace for qgroup.
|
||||
*
|
||||
* This is called during btrfs_cow_block().
|
||||
*/
|
||||
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *subvol_eb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
|
||||
struct btrfs_qgroup_swapped_block *block;
|
||||
struct extent_buffer *reloc_eb = NULL;
|
||||
struct rb_node *node;
|
||||
bool found = false;
|
||||
bool swapped = false;
|
||||
int level = btrfs_header_level(subvol_eb);
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return 0;
|
||||
if (!is_fstree(root->root_key.objectid) || !root->reloc_root)
|
||||
return 0;
|
||||
|
||||
spin_lock(&blocks->lock);
|
||||
if (!blocks->swapped) {
|
||||
spin_unlock(&blocks->lock);
|
||||
return 0;
|
||||
}
|
||||
node = blocks->blocks[level].rb_node;
|
||||
|
||||
while (node) {
|
||||
block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
|
||||
if (block->subvol_bytenr < subvol_eb->start) {
|
||||
node = node->rb_left;
|
||||
} else if (block->subvol_bytenr > subvol_eb->start) {
|
||||
node = node->rb_right;
|
||||
} else {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
spin_unlock(&blocks->lock);
|
||||
goto out;
|
||||
}
|
||||
/* Found one, remove it from @blocks first and update blocks->swapped */
|
||||
rb_erase(&block->node, &blocks->blocks[level]);
|
||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
|
||||
if (RB_EMPTY_ROOT(&blocks->blocks[i])) {
|
||||
swapped = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
blocks->swapped = swapped;
|
||||
spin_unlock(&blocks->lock);
|
||||
|
||||
/* Read out reloc subtree root */
|
||||
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr,
|
||||
block->reloc_generation, block->level,
|
||||
&block->first_key);
|
||||
if (IS_ERR(reloc_eb)) {
|
||||
ret = PTR_ERR(reloc_eb);
|
||||
reloc_eb = NULL;
|
||||
goto free_out;
|
||||
}
|
||||
if (!extent_buffer_uptodate(reloc_eb)) {
|
||||
ret = -EIO;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
|
||||
block->last_snapshot, block->trace_leaf);
|
||||
free_out:
|
||||
kfree(block);
|
||||
free_extent_buffer(reloc_eb);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
btrfs_err_rl(fs_info,
|
||||
"failed to account subtree at bytenr %llu: %d",
|
||||
subvol_eb->start, ret);
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -6,6 +6,8 @@
|
||||
#ifndef BTRFS_QGROUP_H
|
||||
#define BTRFS_QGROUP_H
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include "ulist.h"
|
||||
#include "delayed-ref.h"
|
||||
|
||||
@ -37,6 +39,66 @@
|
||||
* Normally at qgroup rescan and transaction commit time.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Special performance optimization for balance.
|
||||
*
|
||||
* For balance, we need to swap subtree of subvolume and reloc trees.
|
||||
* In theory, we need to trace all subtree blocks of both subvolume and reloc
|
||||
* trees, since their owner has changed during such swap.
|
||||
*
|
||||
* However since balance has ensured that both subtrees are containing the
|
||||
* same contents and have the same tree structures, such swap won't cause
|
||||
* qgroup number change.
|
||||
*
|
||||
* But there is a race window between subtree swap and transaction commit,
|
||||
* during that window, if we increase/decrease tree level or merge/split tree
|
||||
* blocks, we still need to trace the original subtrees.
|
||||
*
|
||||
* So for balance, we use a delayed subtree tracing, whose workflow is:
|
||||
*
|
||||
* 1) Record the subtree root block get swapped.
|
||||
*
|
||||
* During subtree swap:
|
||||
* O = Old tree blocks
|
||||
* N = New tree blocks
|
||||
* reloc tree subvolume tree X
|
||||
* Root Root
|
||||
* / \ / \
|
||||
* NA OB OA OB
|
||||
* / | | \ / | | \
|
||||
* NC ND OE OF OC OD OE OF
|
||||
*
|
||||
* In this case, NA and OA are going to be swapped, record (NA, OA) into
|
||||
* subvolume tree X.
|
||||
*
|
||||
* 2) After subtree swap.
|
||||
* reloc tree subvolume tree X
|
||||
* Root Root
|
||||
* / \ / \
|
||||
* OA OB NA OB
|
||||
* / | | \ / | | \
|
||||
* OC OD OE OF NC ND OE OF
|
||||
*
|
||||
* 3a) COW happens for OB
|
||||
* If we are going to COW tree block OB, we check OB's bytenr against
|
||||
* tree X's swapped_blocks structure.
|
||||
* If it doesn't fit any, nothing will happen.
|
||||
*
|
||||
* 3b) COW happens for NA
|
||||
* Check NA's bytenr against tree X's swapped_blocks, and get a hit.
|
||||
* Then we do subtree scan on both subtrees OA and NA.
|
||||
* Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND).
|
||||
*
|
||||
* Then no matter what we do to subvolume tree X, qgroup numbers will
|
||||
* still be correct.
|
||||
* Then NA's record gets removed from X's swapped_blocks.
|
||||
*
|
||||
* 4) Transaction commit
|
||||
* Any record in X's swapped_blocks gets removed, since there is no
|
||||
* modification to the swapped subtrees, no need to trigger heavy qgroup
|
||||
* subtree rescan for them.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Record a dirty extent, and info qgroup to update quota on it
|
||||
* TODO: Use kmem cache to alloc it.
|
||||
@ -45,9 +107,38 @@ struct btrfs_qgroup_extent_record {
|
||||
struct rb_node node;
|
||||
u64 bytenr;
|
||||
u64 num_bytes;
|
||||
|
||||
/*
|
||||
* For qgroup reserved data space freeing.
|
||||
*
|
||||
* @data_rsv_refroot and @data_rsv will be recorded after
|
||||
* BTRFS_ADD_DELAYED_EXTENT is called.
|
||||
* And will be used to free reserved qgroup space at
|
||||
* transaction commit time.
|
||||
*/
|
||||
u32 data_rsv; /* reserved data space needs to be freed */
|
||||
u64 data_rsv_refroot; /* which root the reserved data belongs to */
|
||||
struct ulist *old_roots;
|
||||
};
|
||||
|
||||
struct btrfs_qgroup_swapped_block {
|
||||
struct rb_node node;
|
||||
|
||||
int level;
|
||||
bool trace_leaf;
|
||||
|
||||
/* bytenr/generation of the tree block in subvolume tree after swap */
|
||||
u64 subvol_bytenr;
|
||||
u64 subvol_generation;
|
||||
|
||||
/* bytenr/generation of the tree block in reloc tree after swap */
|
||||
u64 reloc_bytenr;
|
||||
u64 reloc_generation;
|
||||
|
||||
u64 last_snapshot;
|
||||
struct btrfs_key first_key;
|
||||
};
|
||||
|
||||
/*
|
||||
* Qgroup reservation types:
|
||||
*
|
||||
@ -236,12 +327,6 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
|
||||
int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *root_eb,
|
||||
u64 root_gen, int root_level);
|
||||
|
||||
int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group_cache *bg_cache,
|
||||
struct extent_buffer *src_parent, int src_slot,
|
||||
struct extent_buffer *dst_parent, int dst_slot,
|
||||
u64 last_snapshot);
|
||||
int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
u64 num_bytes, struct ulist *old_roots,
|
||||
struct ulist *new_roots);
|
||||
@ -252,15 +337,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes,
|
||||
enum btrfs_qgroup_rsv_type type);
|
||||
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes)
|
||||
{
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return;
|
||||
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
|
||||
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
|
||||
BTRFS_QGROUP_RSV_DATA);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
@ -325,4 +401,18 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
|
||||
|
||||
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
|
||||
|
||||
/* btrfs_qgroup_swapped_blocks related functions */
|
||||
void btrfs_qgroup_init_swapped_blocks(
|
||||
struct btrfs_qgroup_swapped_blocks *swapped_blocks);
|
||||
|
||||
void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *subvol_root,
|
||||
struct btrfs_block_group_cache *bg,
|
||||
struct extent_buffer *subvol_parent, int subvol_slot,
|
||||
struct extent_buffer *reloc_parent, int reloc_slot,
|
||||
u64 last_snapshot);
|
||||
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *eb);
|
||||
|
||||
#endif
|
||||
|
@ -583,7 +583,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
|
||||
return -EIO;
|
||||
}
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
path->nodes[level-1] = eb;
|
||||
path->slots[level-1] = 0;
|
||||
path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
|
||||
@ -987,7 +987,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
|
||||
return -ENOMEM;
|
||||
|
||||
eb = btrfs_read_lock_root_node(fs_info->extent_root);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
btrfs_set_lock_blocking_read(eb);
|
||||
level = btrfs_header_level(eb);
|
||||
path->nodes[level] = eb;
|
||||
path->slots[level] = 0;
|
||||
|
@ -162,6 +162,8 @@ struct reloc_control {
|
||||
struct mapping_tree reloc_root_tree;
|
||||
/* list of reloc trees */
|
||||
struct list_head reloc_roots;
|
||||
/* list of subvolume trees that get relocated */
|
||||
struct list_head dirty_subvol_roots;
|
||||
/* size of metadata reservation for merging reloc trees */
|
||||
u64 merging_rsv_size;
|
||||
/* size of relocated tree nodes */
|
||||
@ -1467,15 +1469,17 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root_item *root_item;
|
||||
int ret;
|
||||
|
||||
if (!root->reloc_root)
|
||||
if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
|
||||
!root->reloc_root)
|
||||
goto out;
|
||||
|
||||
reloc_root = root->reloc_root;
|
||||
root_item = &reloc_root->root_item;
|
||||
|
||||
/* root->reloc_root will stay until current relocation finished */
|
||||
if (fs_info->reloc_ctl->merge_reloc_tree &&
|
||||
btrfs_root_refs(root_item) == 0) {
|
||||
root->reloc_root = NULL;
|
||||
set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
|
||||
__del_reloc_root(reloc_root);
|
||||
}
|
||||
|
||||
@ -1773,7 +1777,7 @@ again:
|
||||
btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
|
||||
|
||||
eb = btrfs_lock_root_node(dest);
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
level = btrfs_header_level(eb);
|
||||
|
||||
if (level < lowest_level) {
|
||||
@ -1786,7 +1790,7 @@ again:
|
||||
ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
|
||||
if (next_key) {
|
||||
next_key->objectid = (u64)-1;
|
||||
@ -1802,6 +1806,8 @@ again:
|
||||
BUG_ON(level < lowest_level);
|
||||
|
||||
ret = btrfs_bin_search(parent, &key, level, &slot);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret && slot > 0)
|
||||
slot--;
|
||||
|
||||
@ -1852,7 +1858,7 @@ again:
|
||||
slot, &eb);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
|
||||
btrfs_tree_unlock(parent);
|
||||
free_extent_buffer(parent);
|
||||
@ -1885,15 +1891,18 @@ again:
|
||||
* If not traced, we will leak data numbers
|
||||
* 2) Fs subtree
|
||||
* If not traced, we will double count old data
|
||||
* and tree block numbers, if current trans doesn't free
|
||||
* data reloc tree inode.
|
||||
*
|
||||
* We don't scan the subtree right now, but only record
|
||||
* the swapped tree blocks.
|
||||
* The real subtree rescan is delayed until we have new
|
||||
* CoW on the subtree root node before transaction commit.
|
||||
*/
|
||||
ret = btrfs_qgroup_trace_subtree_swap(trans, rc->block_group,
|
||||
parent, slot, path->nodes[level],
|
||||
path->slots[level], last_snapshot);
|
||||
ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
|
||||
rc->block_group, parent, slot,
|
||||
path->nodes[level], path->slots[level],
|
||||
last_snapshot);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* swap blocks in fs tree and reloc tree.
|
||||
*/
|
||||
@ -2120,6 +2129,58 @@ static int find_next_key(struct btrfs_path *path, int level,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert current subvolume into reloc_control::dirty_subvol_roots
|
||||
*/
|
||||
static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
|
||||
struct reloc_control *rc,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_root *reloc_root = root->reloc_root;
|
||||
struct btrfs_root_item *reloc_root_item;
|
||||
|
||||
/* @root must be a subvolume tree root with a valid reloc tree */
|
||||
ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
|
||||
ASSERT(reloc_root);
|
||||
|
||||
reloc_root_item = &reloc_root->root_item;
|
||||
memset(&reloc_root_item->drop_progress, 0,
|
||||
sizeof(reloc_root_item->drop_progress));
|
||||
reloc_root_item->drop_level = 0;
|
||||
btrfs_set_root_refs(reloc_root_item, 0);
|
||||
btrfs_update_reloc_root(trans, root);
|
||||
|
||||
if (list_empty(&root->reloc_dirty_list)) {
|
||||
btrfs_grab_fs_root(root);
|
||||
list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
|
||||
}
|
||||
}
|
||||
|
||||
static int clean_dirty_subvols(struct reloc_control *rc)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_root *next;
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
|
||||
reloc_dirty_list) {
|
||||
struct btrfs_root *reloc_root = root->reloc_root;
|
||||
|
||||
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
|
||||
list_del_init(&root->reloc_dirty_list);
|
||||
root->reloc_root = NULL;
|
||||
if (reloc_root) {
|
||||
int ret2;
|
||||
|
||||
ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
|
||||
if (ret2 < 0 && !ret)
|
||||
ret = ret2;
|
||||
}
|
||||
btrfs_put_fs_root(root);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* merge the relocated tree blocks in reloc tree with corresponding
|
||||
* fs tree.
|
||||
@ -2128,7 +2189,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
|
||||
LIST_HEAD(inode_list);
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key next_key;
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
@ -2259,13 +2319,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (err == 0) {
|
||||
memset(&root_item->drop_progress, 0,
|
||||
sizeof(root_item->drop_progress));
|
||||
root_item->drop_level = 0;
|
||||
btrfs_set_root_refs(root_item, 0);
|
||||
btrfs_update_reloc_root(trans, root);
|
||||
}
|
||||
if (err == 0)
|
||||
insert_dirty_subvol(trans, rc, root);
|
||||
|
||||
if (trans)
|
||||
btrfs_end_transaction_throttle(trans);
|
||||
@ -2410,14 +2465,6 @@ again:
|
||||
} else {
|
||||
list_del_init(&reloc_root->root_list);
|
||||
}
|
||||
|
||||
ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
|
||||
if (ret < 0) {
|
||||
if (list_empty(&reloc_root->root_list))
|
||||
list_add_tail(&reloc_root->root_list,
|
||||
&reloc_roots);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
@ -2685,6 +2732,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
if (!lowest) {
|
||||
ret = btrfs_bin_search(upper->eb, key,
|
||||
upper->level, &slot);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto next;
|
||||
}
|
||||
BUG_ON(ret);
|
||||
bytenr = btrfs_node_blockptr(upper->eb, slot);
|
||||
if (node->eb->start == bytenr)
|
||||
@ -2720,6 +2771,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
} else {
|
||||
ret = btrfs_bin_search(upper->eb, key, upper->level,
|
||||
&slot);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto next;
|
||||
}
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
@ -2752,7 +2807,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
goto next;
|
||||
}
|
||||
btrfs_tree_lock(eb);
|
||||
btrfs_set_lock_blocking(eb);
|
||||
btrfs_set_lock_blocking_write(eb);
|
||||
|
||||
if (!node->eb) {
|
||||
ret = btrfs_cow_block(trans, root, eb, upper->eb,
|
||||
@ -4079,6 +4134,9 @@ restart:
|
||||
goto out_free;
|
||||
}
|
||||
btrfs_commit_transaction(trans);
|
||||
ret = clean_dirty_subvols(rc);
|
||||
if (ret < 0 && !err)
|
||||
err = ret;
|
||||
out_free:
|
||||
btrfs_free_block_rsv(fs_info, rc->block_rsv);
|
||||
btrfs_free_path(path);
|
||||
@ -4173,6 +4231,7 @@ static struct reloc_control *alloc_reloc_control(void)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&rc->reloc_roots);
|
||||
INIT_LIST_HEAD(&rc->dirty_subvol_roots);
|
||||
backref_cache_init(&rc->backref_cache);
|
||||
mapping_tree_init(&rc->reloc_root_tree);
|
||||
extent_io_tree_init(&rc->processed_blocks, NULL);
|
||||
@ -4468,6 +4527,10 @@ int btrfs_recover_relocation(struct btrfs_root *root)
|
||||
goto out_free;
|
||||
}
|
||||
err = btrfs_commit_transaction(trans);
|
||||
|
||||
ret = clean_dirty_subvols(rc);
|
||||
if (ret < 0 && !err)
|
||||
err = ret;
|
||||
out_free:
|
||||
kfree(rc);
|
||||
out:
|
||||
|
@ -21,12 +21,12 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
|
||||
struct btrfs_root_item *item)
|
||||
{
|
||||
uuid_le uuid;
|
||||
int len;
|
||||
u32 len;
|
||||
int need_reset = 0;
|
||||
|
||||
len = btrfs_item_size_nr(eb, slot);
|
||||
read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot),
|
||||
min_t(int, len, (int)sizeof(*item)));
|
||||
min_t(u32, len, sizeof(*item)));
|
||||
if (len < sizeof(*item))
|
||||
need_reset = 1;
|
||||
if (!need_reset && btrfs_root_generation(item)
|
||||
|
@ -584,6 +584,7 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
|
||||
sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
|
||||
sctx->curr = -1;
|
||||
sctx->fs_info = fs_info;
|
||||
INIT_LIST_HEAD(&sctx->csum_list);
|
||||
for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
|
||||
struct scrub_bio *sbio;
|
||||
|
||||
@ -608,7 +609,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
|
||||
atomic_set(&sctx->workers_pending, 0);
|
||||
atomic_set(&sctx->cancel_req, 0);
|
||||
sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
INIT_LIST_HEAD(&sctx->csum_list);
|
||||
|
||||
spin_lock_init(&sctx->list_lock);
|
||||
spin_lock_init(&sctx->stat_lock);
|
||||
@ -3741,25 +3741,33 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
|
||||
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
|
||||
int max_active = fs_info->thread_pool_size;
|
||||
|
||||
if (fs_info->scrub_workers_refcnt == 0) {
|
||||
lockdep_assert_held(&fs_info->scrub_lock);
|
||||
|
||||
if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
|
||||
ASSERT(fs_info->scrub_workers == NULL);
|
||||
fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
|
||||
flags, is_dev_replace ? 1 : max_active, 4);
|
||||
if (!fs_info->scrub_workers)
|
||||
goto fail_scrub_workers;
|
||||
|
||||
ASSERT(fs_info->scrub_wr_completion_workers == NULL);
|
||||
fs_info->scrub_wr_completion_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
|
||||
max_active, 2);
|
||||
if (!fs_info->scrub_wr_completion_workers)
|
||||
goto fail_scrub_wr_completion_workers;
|
||||
|
||||
ASSERT(fs_info->scrub_parity_workers == NULL);
|
||||
fs_info->scrub_parity_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
|
||||
max_active, 2);
|
||||
if (!fs_info->scrub_parity_workers)
|
||||
goto fail_scrub_parity_workers;
|
||||
|
||||
refcount_set(&fs_info->scrub_workers_refcnt, 1);
|
||||
} else {
|
||||
refcount_inc(&fs_info->scrub_workers_refcnt);
|
||||
}
|
||||
++fs_info->scrub_workers_refcnt;
|
||||
return 0;
|
||||
|
||||
fail_scrub_parity_workers:
|
||||
@ -3770,16 +3778,6 @@ fail_scrub_workers:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (--fs_info->scrub_workers_refcnt == 0) {
|
||||
btrfs_destroy_workqueue(fs_info->scrub_workers);
|
||||
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
|
||||
btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
|
||||
}
|
||||
WARN_ON(fs_info->scrub_workers_refcnt < 0);
|
||||
}
|
||||
|
||||
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
u64 end, struct btrfs_scrub_progress *progress,
|
||||
int readonly, int is_dev_replace)
|
||||
@ -3788,6 +3786,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
int ret;
|
||||
struct btrfs_device *dev;
|
||||
unsigned int nofs_flag;
|
||||
struct btrfs_workqueue *scrub_workers = NULL;
|
||||
struct btrfs_workqueue *scrub_wr_comp = NULL;
|
||||
struct btrfs_workqueue *scrub_parity = NULL;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return -EINVAL;
|
||||
@ -3835,7 +3836,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
return PTR_ERR(sctx);
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
||||
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
||||
!is_dev_replace)) {
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
@ -3903,6 +3904,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
if (!is_dev_replace) {
|
||||
btrfs_info(fs_info, "scrub: started on devid %llu", devid);
|
||||
/*
|
||||
* by holding device list mutex, we can
|
||||
* kick off writing super in log tree sync.
|
||||
@ -3925,11 +3927,26 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
if (progress)
|
||||
memcpy(progress, &sctx->stat, sizeof(*progress));
|
||||
|
||||
if (!is_dev_replace)
|
||||
btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
|
||||
ret ? "not finished" : "finished", devid, ret);
|
||||
|
||||
mutex_lock(&fs_info->scrub_lock);
|
||||
dev->scrub_ctx = NULL;
|
||||
scrub_workers_put(fs_info);
|
||||
if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
|
||||
scrub_workers = fs_info->scrub_workers;
|
||||
scrub_wr_comp = fs_info->scrub_wr_completion_workers;
|
||||
scrub_parity = fs_info->scrub_parity_workers;
|
||||
|
||||
fs_info->scrub_workers = NULL;
|
||||
fs_info->scrub_wr_completion_workers = NULL;
|
||||
fs_info->scrub_parity_workers = NULL;
|
||||
}
|
||||
mutex_unlock(&fs_info->scrub_lock);
|
||||
|
||||
btrfs_destroy_workqueue(scrub_workers);
|
||||
btrfs_destroy_workqueue(scrub_wr_comp);
|
||||
btrfs_destroy_workqueue(scrub_parity);
|
||||
scrub_put_ctx(sctx);
|
||||
|
||||
return ret;
|
||||
@ -4012,7 +4029,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
struct scrub_ctx *sctx = NULL;
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
||||
if (dev)
|
||||
sctx = dev->scrub_ctx;
|
||||
if (sctx)
|
||||
|
@ -529,7 +529,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
if (token != Opt_compress &&
|
||||
token != Opt_compress_force)
|
||||
info->compress_level =
|
||||
btrfs_compress_str2level(args[0].from);
|
||||
btrfs_compress_str2level(
|
||||
BTRFS_COMPRESS_ZLIB,
|
||||
args[0].from + 4);
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
btrfs_clear_opt(info->mount_opt, NODATACOW);
|
||||
btrfs_clear_opt(info->mount_opt, NODATASUM);
|
||||
@ -542,9 +544,13 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
btrfs_clear_opt(info->mount_opt, NODATASUM);
|
||||
btrfs_set_fs_incompat(info, COMPRESS_LZO);
|
||||
no_compress = 0;
|
||||
} else if (strcmp(args[0].from, "zstd") == 0) {
|
||||
} else if (strncmp(args[0].from, "zstd", 4) == 0) {
|
||||
compress_type = "zstd";
|
||||
info->compress_type = BTRFS_COMPRESS_ZSTD;
|
||||
info->compress_level =
|
||||
btrfs_compress_str2level(
|
||||
BTRFS_COMPRESS_ZSTD,
|
||||
args[0].from + 4);
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
btrfs_clear_opt(info->mount_opt, NODATACOW);
|
||||
btrfs_clear_opt(info->mount_opt, NODATASUM);
|
||||
@ -2190,6 +2196,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
ret = PTR_ERR_OR_ZERO(device);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
break;
|
||||
case BTRFS_IOC_FORGET_DEV:
|
||||
ret = btrfs_forget_devices(vol->name);
|
||||
break;
|
||||
case BTRFS_IOC_DEVICES_READY:
|
||||
mutex_lock(&uuid_mutex);
|
||||
device = btrfs_scan_one_device(vol->name, FMODE_READ,
|
||||
|
@ -122,6 +122,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
|
||||
if (is_fstree(root->root_key.objectid))
|
||||
btrfs_unpin_free_ino(root);
|
||||
clear_btree_io_tree(&root->dirty_log_pages);
|
||||
btrfs_qgroup_clean_swapped_blocks(root);
|
||||
}
|
||||
|
||||
/* We can free old roots now. */
|
||||
@ -845,8 +846,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
btrfs_trans_release_metadata(trans);
|
||||
trans->block_rsv = NULL;
|
||||
|
||||
if (!list_empty(&trans->new_bgs))
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
|
||||
@ -1532,7 +1532,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
btrfs_set_lock_blocking(old);
|
||||
btrfs_set_lock_blocking_write(old);
|
||||
|
||||
ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
|
||||
/* clean up in any case */
|
||||
@ -1943,8 +1943,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
cur_trans->delayed_refs.flushing = 1;
|
||||
smp_wmb();
|
||||
|
||||
if (!list_empty(&trans->new_bgs))
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
|
||||
ret = btrfs_run_delayed_refs(trans, 0);
|
||||
if (ret) {
|
||||
|
@ -52,7 +52,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
u32 nritems;
|
||||
|
||||
root_node = btrfs_lock_root_node(root);
|
||||
btrfs_set_lock_blocking(root_node);
|
||||
btrfs_set_lock_blocking_write(root_node);
|
||||
nritems = btrfs_header_nritems(root_node);
|
||||
root->defrag_max.objectid = 0;
|
||||
/* from above we know this is not a leaf */
|
||||
|
@ -27,6 +27,7 @@
|
||||
#define LOG_INODE_ALL 0
|
||||
#define LOG_INODE_EXISTS 1
|
||||
#define LOG_OTHER_INODE 2
|
||||
#define LOG_OTHER_INODE_ALL 3
|
||||
|
||||
/*
|
||||
* directory trouble cases
|
||||
@ -1330,6 +1331,67 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct inode *dir, struct inode *inode, const char *name,
|
||||
int namelen, u64 ref_index)
|
||||
{
|
||||
struct btrfs_dir_item *dir_item;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct inode *other_inode = NULL;
|
||||
int ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
dir_item = btrfs_lookup_dir_item(NULL, root, path,
|
||||
btrfs_ino(BTRFS_I(dir)),
|
||||
name, namelen, 0);
|
||||
if (!dir_item) {
|
||||
btrfs_release_path(path);
|
||||
goto add_link;
|
||||
} else if (IS_ERR(dir_item)) {
|
||||
ret = PTR_ERR(dir_item);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our inode's dentry collides with the dentry of another inode which is
|
||||
* in the log but not yet processed since it has a higher inode number.
|
||||
* So delete that other dentry.
|
||||
*/
|
||||
btrfs_dir_item_key_to_cpu(path->nodes[0], dir_item, &key);
|
||||
btrfs_release_path(path);
|
||||
other_inode = read_one_inode(root, key.objectid);
|
||||
if (!other_inode) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode),
|
||||
name, namelen);
|
||||
if (ret)
|
||||
goto out;
|
||||
/*
|
||||
* If we dropped the link count to 0, bump it so that later the iput()
|
||||
* on the inode will not free it. We will fixup the link count later.
|
||||
*/
|
||||
if (other_inode->i_nlink == 0)
|
||||
inc_nlink(other_inode);
|
||||
|
||||
ret = btrfs_run_delayed_items(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
add_link:
|
||||
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
|
||||
name, namelen, 0, ref_index);
|
||||
out:
|
||||
iput(other_inode);
|
||||
btrfs_free_path(path);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* replay one inode back reference item found in the log tree.
|
||||
* eb, slot and key refer to the buffer and key found in the log tree.
|
||||
@ -1466,9 +1528,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
|
||||
/* insert our name */
|
||||
ret = btrfs_add_link(trans, BTRFS_I(dir),
|
||||
BTRFS_I(inode),
|
||||
name, namelen, 0, ref_index);
|
||||
ret = add_link(trans, root, dir, inode, name, namelen,
|
||||
ref_index);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -2663,7 +2724,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (trans) {
|
||||
btrfs_tree_lock(next);
|
||||
btrfs_set_lock_blocking(next);
|
||||
btrfs_set_lock_blocking_write(next);
|
||||
clean_tree_block(fs_info, next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
@ -2747,7 +2808,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (trans) {
|
||||
btrfs_tree_lock(next);
|
||||
btrfs_set_lock_blocking(next);
|
||||
btrfs_set_lock_blocking_write(next);
|
||||
clean_tree_block(fs_info, next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
@ -2829,7 +2890,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (trans) {
|
||||
btrfs_tree_lock(next);
|
||||
btrfs_set_lock_blocking(next);
|
||||
btrfs_set_lock_blocking_write(next);
|
||||
clean_tree_block(fs_info, next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
@ -3706,6 +3767,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
|
||||
found_key.type = 0;
|
||||
ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
|
||||
&start_slot);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
ret = btrfs_del_items(trans, log, path, start_slot,
|
||||
path->slots[0] - start_slot + 1);
|
||||
@ -4717,7 +4780,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
|
||||
const int slot,
|
||||
const struct btrfs_key *key,
|
||||
struct btrfs_inode *inode,
|
||||
u64 *other_ino)
|
||||
u64 *other_ino, u64 *other_parent)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_path *search_path;
|
||||
@ -4780,8 +4843,13 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
|
||||
btrfs_dir_item_key_to_cpu(search_path->nodes[0],
|
||||
di, &di_key);
|
||||
if (di_key.type == BTRFS_INODE_ITEM_KEY) {
|
||||
ret = 1;
|
||||
*other_ino = di_key.objectid;
|
||||
if (di_key.objectid != key->objectid) {
|
||||
ret = 1;
|
||||
*other_ino = di_key.objectid;
|
||||
*other_parent = parent;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
@ -4801,6 +4869,144 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_ino_list {
|
||||
u64 ino;
|
||||
u64 parent;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_log_ctx *ctx,
|
||||
u64 ino, u64 parent)
|
||||
{
|
||||
struct btrfs_ino_list *ino_elem;
|
||||
LIST_HEAD(inode_list);
|
||||
int ret = 0;
|
||||
|
||||
ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
|
||||
if (!ino_elem)
|
||||
return -ENOMEM;
|
||||
ino_elem->ino = ino;
|
||||
ino_elem->parent = parent;
|
||||
list_add_tail(&ino_elem->list, &inode_list);
|
||||
|
||||
while (!list_empty(&inode_list)) {
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct inode *inode;
|
||||
|
||||
ino_elem = list_first_entry(&inode_list, struct btrfs_ino_list,
|
||||
list);
|
||||
ino = ino_elem->ino;
|
||||
parent = ino_elem->parent;
|
||||
list_del(&ino_elem->list);
|
||||
kfree(ino_elem);
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_INODE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
|
||||
/*
|
||||
* If the other inode that had a conflicting dir entry was
|
||||
* deleted in the current transaction, we need to log its parent
|
||||
* directory.
|
||||
*/
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
if (ret == -ENOENT) {
|
||||
key.objectid = parent;
|
||||
inode = btrfs_iget(fs_info->sb, &key, root,
|
||||
NULL);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
} else {
|
||||
ret = btrfs_log_inode(trans, root,
|
||||
BTRFS_I(inode),
|
||||
LOG_OTHER_INODE_ALL,
|
||||
0, LLONG_MAX, ctx);
|
||||
iput(inode);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* We are safe logging the other inode without acquiring its
|
||||
* lock as long as we log with the LOG_INODE_EXISTS mode. We
|
||||
* are safe against concurrent renames of the other inode as
|
||||
* well because during a rename we pin the log and update the
|
||||
* log with the new name before we unpin it.
|
||||
*/
|
||||
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
|
||||
LOG_OTHER_INODE, 0, LLONG_MAX, ctx);
|
||||
if (ret) {
|
||||
iput(inode);
|
||||
continue;
|
||||
}
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_INODE_REF_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0) {
|
||||
iput(inode);
|
||||
continue;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
u64 other_ino = 0;
|
||||
u64 other_parent = 0;
|
||||
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0) {
|
||||
break;
|
||||
} else if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (key.objectid != ino ||
|
||||
(key.type != BTRFS_INODE_REF_KEY &&
|
||||
key.type != BTRFS_INODE_EXTREF_KEY)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_check_ref_name_override(leaf, slot, &key,
|
||||
BTRFS_I(inode), &other_ino,
|
||||
&other_parent);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret > 0) {
|
||||
ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS);
|
||||
if (!ino_elem) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
ino_elem->ino = other_ino;
|
||||
ino_elem->parent = other_parent;
|
||||
list_add_tail(&ino_elem->list, &inode_list);
|
||||
ret = 0;
|
||||
}
|
||||
path->slots[0]++;
|
||||
}
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* log a single inode in the tree log.
|
||||
* At least one parent directory for this inode must exist in the tree
|
||||
* or be logged already.
|
||||
@ -4840,6 +5046,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
u64 logged_isize = 0;
|
||||
bool need_log_inode_item = true;
|
||||
bool xattrs_logged = false;
|
||||
bool recursive_logging = false;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
@ -4885,8 +5092,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (inode_only == LOG_OTHER_INODE) {
|
||||
inode_only = LOG_INODE_EXISTS;
|
||||
if (inode_only == LOG_OTHER_INODE || inode_only == LOG_OTHER_INODE_ALL) {
|
||||
recursive_logging = true;
|
||||
if (inode_only == LOG_OTHER_INODE)
|
||||
inode_only = LOG_INODE_EXISTS;
|
||||
else
|
||||
inode_only = LOG_INODE_ALL;
|
||||
mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
|
||||
} else {
|
||||
mutex_lock(&inode->log_mutex);
|
||||
@ -4981,20 +5192,19 @@ again:
|
||||
|
||||
if ((min_key.type == BTRFS_INODE_REF_KEY ||
|
||||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
|
||||
inode->generation == trans->transid) {
|
||||
inode->generation == trans->transid &&
|
||||
!recursive_logging) {
|
||||
u64 other_ino = 0;
|
||||
u64 other_parent = 0;
|
||||
|
||||
ret = btrfs_check_ref_name_override(path->nodes[0],
|
||||
path->slots[0], &min_key, inode,
|
||||
&other_ino);
|
||||
&other_ino, &other_parent);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
} else if (ret > 0 && ctx &&
|
||||
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
|
||||
struct btrfs_key inode_key;
|
||||
struct inode *other_inode;
|
||||
|
||||
if (ins_nr > 0) {
|
||||
ins_nr++;
|
||||
} else {
|
||||
@ -5010,43 +5220,13 @@ again:
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
btrfs_release_path(path);
|
||||
inode_key.objectid = other_ino;
|
||||
inode_key.type = BTRFS_INODE_ITEM_KEY;
|
||||
inode_key.offset = 0;
|
||||
other_inode = btrfs_iget(fs_info->sb,
|
||||
&inode_key, root,
|
||||
NULL);
|
||||
/*
|
||||
* If the other inode that had a conflicting dir
|
||||
* entry was deleted in the current transaction,
|
||||
* we don't need to do more work nor fallback to
|
||||
* a transaction commit.
|
||||
*/
|
||||
if (other_inode == ERR_PTR(-ENOENT)) {
|
||||
goto next_key;
|
||||
} else if (IS_ERR(other_inode)) {
|
||||
err = PTR_ERR(other_inode);
|
||||
goto out_unlock;
|
||||
}
|
||||
/*
|
||||
* We are safe logging the other inode without
|
||||
* acquiring its i_mutex as long as we log with
|
||||
* the LOG_INODE_EXISTS mode. We're safe against
|
||||
* concurrent renames of the other inode as well
|
||||
* because during a rename we pin the log and
|
||||
* update the log with the new name before we
|
||||
* unpin it.
|
||||
*/
|
||||
err = btrfs_log_inode(trans, root,
|
||||
BTRFS_I(other_inode),
|
||||
LOG_OTHER_INODE, 0, LLONG_MAX,
|
||||
ctx);
|
||||
iput(other_inode);
|
||||
|
||||
err = log_conflicting_inodes(trans, root, path,
|
||||
ctx, other_ino, other_parent);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
else
|
||||
goto next_key;
|
||||
btrfs_release_path(path);
|
||||
goto next_key;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -415,27 +415,6 @@ static struct btrfs_device *__alloc_device(void)
|
||||
return dev;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a device specified by @devid or @uuid in the list of @fs_devices, or
|
||||
* return NULL.
|
||||
*
|
||||
* If devid and uuid are both specified, the match must be exact, otherwise
|
||||
* only devid is used.
|
||||
*/
|
||||
static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
|
||||
u64 devid, const u8 *uuid)
|
||||
{
|
||||
struct btrfs_device *dev;
|
||||
|
||||
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
|
||||
if (dev->devid == devid &&
|
||||
(!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
|
||||
return dev;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static noinline struct btrfs_fs_devices *find_fsid(
|
||||
const u8 *fsid, const u8 *metadata_fsid)
|
||||
{
|
||||
@ -734,6 +713,17 @@ static void pending_bios_fn(struct btrfs_work *work)
|
||||
run_scheduled_bios(device);
|
||||
}
|
||||
|
||||
static bool device_path_matched(const char *path, struct btrfs_device *device)
|
||||
{
|
||||
int found;
|
||||
|
||||
rcu_read_lock();
|
||||
found = strcmp(rcu_str_deref(device->name), path);
|
||||
rcu_read_unlock();
|
||||
|
||||
return found == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search and remove all stale (devices which are not mounted) devices.
|
||||
* When both inputs are NULL, it will search and release all stale devices.
|
||||
@ -741,52 +731,57 @@ static void pending_bios_fn(struct btrfs_work *work)
|
||||
* matching this path only.
|
||||
* skip_dev: Optional. Will skip this device when searching for the stale
|
||||
* devices.
|
||||
* Return: 0 for success or if @path is NULL.
|
||||
* -EBUSY if @path is a mounted device.
|
||||
* -ENOENT if @path does not match any device in the list.
|
||||
*/
|
||||
static void btrfs_free_stale_devices(const char *path,
|
||||
static int btrfs_free_stale_devices(const char *path,
|
||||
struct btrfs_device *skip_device)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
|
||||
struct btrfs_device *device, *tmp_device;
|
||||
int ret = 0;
|
||||
|
||||
if (path)
|
||||
ret = -ENOENT;
|
||||
|
||||
list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
if (fs_devices->opened) {
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry_safe(device, tmp_device,
|
||||
&fs_devices->devices, dev_list) {
|
||||
int not_found = 0;
|
||||
|
||||
if (skip_device && skip_device == device)
|
||||
continue;
|
||||
if (path && !device->name)
|
||||
continue;
|
||||
|
||||
rcu_read_lock();
|
||||
if (path)
|
||||
not_found = strcmp(rcu_str_deref(device->name),
|
||||
path);
|
||||
rcu_read_unlock();
|
||||
if (not_found)
|
||||
if (path && !device_path_matched(path, device))
|
||||
continue;
|
||||
if (fs_devices->opened) {
|
||||
/* for an already deleted device return 0 */
|
||||
if (path && ret != 0)
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
|
||||
/* delete the stale device */
|
||||
fs_devices->num_devices--;
|
||||
list_del(&device->dev_list);
|
||||
btrfs_free_device(device);
|
||||
|
||||
ret = 0;
|
||||
if (fs_devices->num_devices == 0)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
if (fs_devices->num_devices == 0) {
|
||||
btrfs_sysfs_remove_fsid(fs_devices);
|
||||
list_del(&fs_devices->fs_list);
|
||||
free_fs_devices(fs_devices);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||
@ -968,8 +963,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
device = NULL;
|
||||
} else {
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
device = find_device(fs_devices, devid,
|
||||
disk_super->dev_item.uuid);
|
||||
device = btrfs_find_device(fs_devices, devid,
|
||||
disk_super->dev_item.uuid, NULL, false);
|
||||
|
||||
/*
|
||||
* If this disk has been pulled into an fs devices created by
|
||||
@ -1134,7 +1129,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
|
||||
mutex_lock(&orig->device_list_mutex);
|
||||
fs_devices->total_devices = orig->total_devices;
|
||||
|
||||
/* We have held the volume lock, it is safe to get the devices. */
|
||||
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
|
||||
struct rcu_string *name;
|
||||
|
||||
@ -1451,6 +1445,17 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_forget_devices(const char *path)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&uuid_mutex);
|
||||
ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
|
||||
mutex_unlock(&uuid_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for a btrfs signature on a device. This may be called out of the mount path
|
||||
* and we are not allowed to call set_blocksize during the scan. The superblock
|
||||
@ -2385,11 +2390,11 @@ static struct btrfs_device *btrfs_find_device_by_path(
|
||||
devid = btrfs_stack_device_id(&disk_super->dev_item);
|
||||
dev_uuid = disk_super->dev_item.uuid;
|
||||
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
|
||||
device = btrfs_find_device(fs_info, devid, dev_uuid,
|
||||
disk_super->metadata_uuid);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||
disk_super->metadata_uuid, true);
|
||||
else
|
||||
device = btrfs_find_device(fs_info, devid,
|
||||
dev_uuid, disk_super->fsid);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||
disk_super->fsid, true);
|
||||
|
||||
brelse(bh);
|
||||
if (!device)
|
||||
@ -2398,50 +2403,38 @@ static struct btrfs_device *btrfs_find_device_by_path(
|
||||
return device;
|
||||
}
|
||||
|
||||
static struct btrfs_device *btrfs_find_device_missing_or_by_path(
|
||||
struct btrfs_fs_info *fs_info, const char *device_path)
|
||||
{
|
||||
struct btrfs_device *device = NULL;
|
||||
if (strcmp(device_path, "missing") == 0) {
|
||||
struct list_head *devices;
|
||||
struct btrfs_device *tmp;
|
||||
|
||||
devices = &fs_info->fs_devices->devices;
|
||||
list_for_each_entry(tmp, devices, dev_list) {
|
||||
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
|
||||
&tmp->dev_state) && !tmp->bdev) {
|
||||
device = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device)
|
||||
return ERR_PTR(-ENOENT);
|
||||
} else {
|
||||
device = btrfs_find_device_by_path(fs_info, device_path);
|
||||
}
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup a device given by device id, or the path if the id is 0.
|
||||
*/
|
||||
struct btrfs_device *btrfs_find_device_by_devspec(
|
||||
struct btrfs_fs_info *fs_info, u64 devid, const char *devpath)
|
||||
struct btrfs_fs_info *fs_info, u64 devid,
|
||||
const char *device_path)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
|
||||
if (devid) {
|
||||
device = btrfs_find_device(fs_info, devid, NULL, NULL);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
|
||||
NULL, true);
|
||||
if (!device)
|
||||
return ERR_PTR(-ENOENT);
|
||||
} else {
|
||||
if (!devpath || !devpath[0])
|
||||
return ERR_PTR(-EINVAL);
|
||||
device = btrfs_find_device_missing_or_by_path(fs_info, devpath);
|
||||
return device;
|
||||
}
|
||||
return device;
|
||||
|
||||
if (!device_path || !device_path[0])
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (strcmp(device_path, "missing") == 0) {
|
||||
/* Find first missing device */
|
||||
list_for_each_entry(device, &fs_info->fs_devices->devices,
|
||||
dev_list) {
|
||||
if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
|
||||
&device->dev_state) && !device->bdev)
|
||||
return device;
|
||||
}
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
return btrfs_find_device_by_path(fs_info, device_path);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2563,7 +2556,8 @@ next_slot:
|
||||
BTRFS_UUID_SIZE);
|
||||
read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
|
||||
BTRFS_FSID_SIZE);
|
||||
device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||
fs_uuid, true);
|
||||
BUG_ON(!device); /* Logic error */
|
||||
|
||||
if (device->fs_devices->seeding) {
|
||||
@ -6616,21 +6610,36 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
u8 *uuid, u8 *fsid)
|
||||
/*
|
||||
* Find a device specified by @devid or @uuid in the list of @fs_devices, or
|
||||
* return NULL.
|
||||
*
|
||||
* If devid and uuid are both specified, the match must be exact, otherwise
|
||||
* only devid is used.
|
||||
*
|
||||
* If @seed is true, traverse through the seed devices.
|
||||
*/
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
||||
u64 devid, u8 *uuid, u8 *fsid,
|
||||
bool seed)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
struct btrfs_fs_devices *cur_devices;
|
||||
|
||||
cur_devices = fs_info->fs_devices;
|
||||
while (cur_devices) {
|
||||
while (fs_devices) {
|
||||
if (!fsid ||
|
||||
!memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
|
||||
device = find_device(cur_devices, devid, uuid);
|
||||
if (device)
|
||||
return device;
|
||||
!memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
|
||||
list_for_each_entry(device, &fs_devices->devices,
|
||||
dev_list) {
|
||||
if (device->devid == devid &&
|
||||
(!uuid || memcmp(device->uuid, uuid,
|
||||
BTRFS_UUID_SIZE) == 0))
|
||||
return device;
|
||||
}
|
||||
}
|
||||
cur_devices = cur_devices->seed;
|
||||
if (seed)
|
||||
fs_devices = fs_devices->seed;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -6782,10 +6791,10 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
|
||||
(type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
|
||||
(type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
|
||||
((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
|
||||
num_stripes != 1)) {
|
||||
btrfs_err(fs_info,
|
||||
@ -6875,8 +6884,8 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
read_extent_buffer(leaf, uuid, (unsigned long)
|
||||
btrfs_stripe_dev_uuid_nr(chunk, i),
|
||||
BTRFS_UUID_SIZE);
|
||||
map->stripes[i].dev = btrfs_find_device(fs_info, devid,
|
||||
uuid, NULL);
|
||||
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
|
||||
devid, uuid, NULL, true);
|
||||
if (!map->stripes[i].dev &&
|
||||
!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
free_extent_map(em);
|
||||
@ -7015,7 +7024,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
|
||||
return PTR_ERR(fs_devices);
|
||||
}
|
||||
|
||||
device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
|
||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||
fs_uuid, true);
|
||||
if (!device) {
|
||||
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
btrfs_report_missing_device(fs_info, devid,
|
||||
@ -7605,7 +7615,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
|
||||
int i;
|
||||
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
dev = btrfs_find_device(fs_info, stats->devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
|
||||
true);
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
if (!dev) {
|
||||
@ -7819,7 +7830,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
/* Make sure no dev extent is beyond device bondary */
|
||||
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
||||
if (!dev) {
|
||||
btrfs_err(fs_info, "failed to find devid %llu", devid);
|
||||
ret = -EUCLEAN;
|
||||
@ -7828,7 +7839,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/* It's possible this device is a dummy for seed device */
|
||||
if (dev->disk_total_bytes == 0) {
|
||||
dev = find_device(fs_info->fs_devices->seed, devid, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices->seed, devid, NULL,
|
||||
NULL, false);
|
||||
if (!dev) {
|
||||
btrfs_err(fs_info, "failed to find seed devid %llu",
|
||||
devid);
|
||||
|
@ -416,6 +416,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
fmode_t flags, void *holder);
|
||||
struct btrfs_device *btrfs_scan_one_device(const char *path,
|
||||
fmode_t flags, void *holder);
|
||||
int btrfs_forget_devices(const char *path);
|
||||
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
||||
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
|
||||
void btrfs_assign_next_active_device(struct btrfs_device *device,
|
||||
@ -433,8 +434,8 @@ void __exit btrfs_cleanup_fs_uuids(void);
|
||||
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 new_size);
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
u8 *uuid, u8 *fsid);
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
||||
u64 devid, u8 *uuid, u8 *fsid, bool seed);
|
||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
||||
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||
|
@ -27,6 +27,33 @@ struct workspace {
|
||||
int level;
|
||||
};
|
||||
|
||||
static struct workspace_manager wsm;
|
||||
|
||||
static void zlib_init_workspace_manager(void)
|
||||
{
|
||||
btrfs_init_workspace_manager(&wsm, &btrfs_zlib_compress);
|
||||
}
|
||||
|
||||
static void zlib_cleanup_workspace_manager(void)
|
||||
{
|
||||
btrfs_cleanup_workspace_manager(&wsm);
|
||||
}
|
||||
|
||||
static struct list_head *zlib_get_workspace(unsigned int level)
|
||||
{
|
||||
struct list_head *ws = btrfs_get_workspace(&wsm, level);
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
|
||||
workspace->level = level;
|
||||
|
||||
return ws;
|
||||
}
|
||||
|
||||
static void zlib_put_workspace(struct list_head *ws)
|
||||
{
|
||||
btrfs_put_workspace(&wsm, ws);
|
||||
}
|
||||
|
||||
static void zlib_free_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
@ -36,7 +63,7 @@ static void zlib_free_workspace(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *zlib_alloc_workspace(void)
|
||||
static struct list_head *zlib_alloc_workspace(unsigned int level)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
int workspacesize;
|
||||
@ -48,6 +75,7 @@ static struct list_head *zlib_alloc_workspace(void)
|
||||
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
|
||||
zlib_inflate_workspacesize());
|
||||
workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
|
||||
workspace->level = level;
|
||||
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!workspace->strm.workspace || !workspace->buf)
|
||||
goto fail;
|
||||
@ -390,18 +418,19 @@ next:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zlib_set_level(struct list_head *ws, unsigned int type)
|
||||
static unsigned int zlib_set_level(unsigned int level)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
unsigned level = (type & 0xF0) >> 4;
|
||||
if (!level)
|
||||
return BTRFS_ZLIB_DEFAULT_LEVEL;
|
||||
|
||||
if (level > 9)
|
||||
level = 9;
|
||||
|
||||
workspace->level = level > 0 ? level : 3;
|
||||
return min_t(unsigned int, level, 9);
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_zlib_compress = {
|
||||
.init_workspace_manager = zlib_init_workspace_manager,
|
||||
.cleanup_workspace_manager = zlib_cleanup_workspace_manager,
|
||||
.get_workspace = zlib_get_workspace,
|
||||
.put_workspace = zlib_put_workspace,
|
||||
.alloc_workspace = zlib_alloc_workspace,
|
||||
.free_workspace = zlib_free_workspace,
|
||||
.compress_pages = zlib_compress_pages,
|
||||
|
316
fs/btrfs/zstd.c
316
fs/btrfs/zstd.c
@ -6,25 +6,31 @@
|
||||
*/
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/zstd.h>
|
||||
#include "compression.h"
|
||||
#include "ctree.h"
|
||||
|
||||
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
|
||||
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
|
||||
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
|
||||
#define ZSTD_BTRFS_MAX_LEVEL 15
|
||||
/* 307s to avoid pathologically clashing with transaction commit */
|
||||
#define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
|
||||
|
||||
static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
|
||||
static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
|
||||
size_t src_len)
|
||||
{
|
||||
ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
|
||||
src_len, 0);
|
||||
ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
|
||||
|
||||
if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
|
||||
params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
|
||||
@ -36,11 +42,290 @@ struct workspace {
|
||||
void *mem;
|
||||
size_t size;
|
||||
char *buf;
|
||||
unsigned int level;
|
||||
unsigned int req_level;
|
||||
unsigned long last_used; /* jiffies */
|
||||
struct list_head list;
|
||||
struct list_head lru_list;
|
||||
ZSTD_inBuffer in_buf;
|
||||
ZSTD_outBuffer out_buf;
|
||||
};
|
||||
|
||||
/*
|
||||
* Zstd Workspace Management
|
||||
*
|
||||
* Zstd workspaces have different memory requirements depending on the level.
|
||||
* The zstd workspaces are managed by having individual lists for each level
|
||||
* and a global lru. Forward progress is maintained by protecting a max level
|
||||
* workspace.
|
||||
*
|
||||
* Getting a workspace is done by using the bitmap to identify the levels that
|
||||
* have available workspaces and scans up. This lets us recycle higher level
|
||||
* workspaces because of the monotonic memory guarantee. A workspace's
|
||||
* last_used is only updated if it is being used by the corresponding memory
|
||||
* level. Putting a workspace involves adding it back to the appropriate places
|
||||
* and adding it back to the lru if necessary.
|
||||
*
|
||||
* A timer is used to reclaim workspaces if they have not been used for
|
||||
* ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around.
|
||||
* The upper bound is provided by the workqueue limit which is 2 (percpu limit).
|
||||
*/
|
||||
|
||||
struct zstd_workspace_manager {
|
||||
const struct btrfs_compress_op *ops;
|
||||
spinlock_t lock;
|
||||
struct list_head lru_list;
|
||||
struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
|
||||
unsigned long active_map;
|
||||
wait_queue_head_t wait;
|
||||
struct timer_list timer;
|
||||
};
|
||||
|
||||
static struct zstd_workspace_manager wsm;
|
||||
|
||||
static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
|
||||
|
||||
static inline struct workspace *list_to_workspace(struct list_head *list)
|
||||
{
|
||||
return container_of(list, struct workspace, list);
|
||||
}
|
||||
|
||||
/*
|
||||
* zstd_reclaim_timer_fn - reclaim timer
|
||||
* @t: timer
|
||||
*
|
||||
* This scans the lru_list and attempts to reclaim any workspace that hasn't
|
||||
* been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
|
||||
*/
|
||||
static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
||||
{
|
||||
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
|
||||
struct list_head *pos, *next;
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
|
||||
if (list_empty(&wsm.lru_list)) {
|
||||
spin_unlock(&wsm.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_for_each_prev_safe(pos, next, &wsm.lru_list) {
|
||||
struct workspace *victim = container_of(pos, struct workspace,
|
||||
lru_list);
|
||||
unsigned int level;
|
||||
|
||||
if (time_after(victim->last_used, reclaim_threshold))
|
||||
break;
|
||||
|
||||
/* workspace is in use */
|
||||
if (victim->req_level)
|
||||
continue;
|
||||
|
||||
level = victim->level;
|
||||
list_del(&victim->lru_list);
|
||||
list_del(&victim->list);
|
||||
wsm.ops->free_workspace(&victim->list);
|
||||
|
||||
if (list_empty(&wsm.idle_ws[level - 1]))
|
||||
clear_bit(level - 1, &wsm.active_map);
|
||||
|
||||
}
|
||||
|
||||
if (!list_empty(&wsm.lru_list))
|
||||
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
|
||||
|
||||
spin_unlock(&wsm.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* zstd_calc_ws_mem_sizes - calculate monotonic memory bounds
|
||||
*
|
||||
* It is possible based on the level configurations that a higher level
|
||||
* workspace uses less memory than a lower level workspace. In order to reuse
|
||||
* workspaces, this must be made a monotonic relationship. This precomputes
|
||||
* the required memory for each level and enforces the monotonicity between
|
||||
* level and memory required.
|
||||
*/
|
||||
static void zstd_calc_ws_mem_sizes(void)
|
||||
{
|
||||
size_t max_size = 0;
|
||||
unsigned int level;
|
||||
|
||||
for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
|
||||
ZSTD_parameters params =
|
||||
zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
|
||||
size_t level_size =
|
||||
max_t(size_t,
|
||||
ZSTD_CStreamWorkspaceBound(params.cParams),
|
||||
ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
|
||||
|
||||
max_size = max_t(size_t, max_size, level_size);
|
||||
zstd_ws_mem_sizes[level - 1] = max_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void zstd_init_workspace_manager(void)
|
||||
{
|
||||
struct list_head *ws;
|
||||
int i;
|
||||
|
||||
zstd_calc_ws_mem_sizes();
|
||||
|
||||
wsm.ops = &btrfs_zstd_compress;
|
||||
spin_lock_init(&wsm.lock);
|
||||
init_waitqueue_head(&wsm.wait);
|
||||
timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
|
||||
|
||||
INIT_LIST_HEAD(&wsm.lru_list);
|
||||
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
|
||||
INIT_LIST_HEAD(&wsm.idle_ws[i]);
|
||||
|
||||
ws = wsm.ops->alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
|
||||
if (IS_ERR(ws)) {
|
||||
pr_warn(
|
||||
"BTRFS: cannot preallocate zstd compression workspace\n");
|
||||
} else {
|
||||
set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
|
||||
list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void zstd_cleanup_workspace_manager(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
int i;
|
||||
|
||||
del_timer(&wsm.timer);
|
||||
|
||||
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
|
||||
while (!list_empty(&wsm.idle_ws[i])) {
|
||||
workspace = container_of(wsm.idle_ws[i].next,
|
||||
struct workspace, list);
|
||||
list_del(&workspace->list);
|
||||
list_del(&workspace->lru_list);
|
||||
wsm.ops->free_workspace(&workspace->list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* zstd_find_workspace - find workspace
|
||||
* @level: compression level
|
||||
*
|
||||
* This iterates over the set bits in the active_map beginning at the requested
|
||||
* compression level. This lets us utilize already allocated workspaces before
|
||||
* allocating a new one. If the workspace is of a larger size, it is used, but
|
||||
* the place in the lru_list and last_used times are not updated. This is to
|
||||
* offer the opportunity to reclaim the workspace in favor of allocating an
|
||||
* appropriately sized one in the future.
|
||||
*/
|
||||
static struct list_head *zstd_find_workspace(unsigned int level)
|
||||
{
|
||||
struct list_head *ws;
|
||||
struct workspace *workspace;
|
||||
int i = level - 1;
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
|
||||
if (!list_empty(&wsm.idle_ws[i])) {
|
||||
ws = wsm.idle_ws[i].next;
|
||||
workspace = list_to_workspace(ws);
|
||||
list_del_init(ws);
|
||||
/* keep its place if it's a lower level using this */
|
||||
workspace->req_level = level;
|
||||
if (level == workspace->level)
|
||||
list_del(&workspace->lru_list);
|
||||
if (list_empty(&wsm.idle_ws[i]))
|
||||
clear_bit(i, &wsm.active_map);
|
||||
spin_unlock(&wsm.lock);
|
||||
return ws;
|
||||
}
|
||||
}
|
||||
spin_unlock(&wsm.lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* zstd_get_workspace - zstd's get_workspace
|
||||
* @level: compression level
|
||||
*
|
||||
* If @level is 0, then any compression level can be used. Therefore, we begin
|
||||
* scanning from 1. We first scan through possible workspaces and then after
|
||||
* attempt to allocate a new workspace. If we fail to allocate one due to
|
||||
* memory pressure, go to sleep waiting for the max level workspace to free up.
|
||||
*/
|
||||
static struct list_head *zstd_get_workspace(unsigned int level)
|
||||
{
|
||||
struct list_head *ws;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
/* level == 0 means we can use any workspace */
|
||||
if (!level)
|
||||
level = 1;
|
||||
|
||||
again:
|
||||
ws = zstd_find_workspace(level);
|
||||
if (ws)
|
||||
return ws;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ws = wsm.ops->alloc_workspace(level);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
if (IS_ERR(ws)) {
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
finish_wait(&wsm.wait, &wait);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
return ws;
|
||||
}
|
||||
|
||||
/*
|
||||
* zstd_put_workspace - zstd put_workspace
|
||||
* @ws: list_head for the workspace
|
||||
*
|
||||
* When putting back a workspace, we only need to update the LRU if we are of
|
||||
* the requested compression level. Here is where we continue to protect the
|
||||
* max level workspace or update last_used accordingly. If the reclaim timer
|
||||
* isn't set, it is also set here. Only the max level workspace tries and wakes
|
||||
* up waiting workspaces.
|
||||
*/
|
||||
static void zstd_put_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace = list_to_workspace(ws);
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
|
||||
/* A node is only taken off the lru if we are the corresponding level */
|
||||
if (workspace->req_level == workspace->level) {
|
||||
/* Hide a max level workspace from reclaim */
|
||||
if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
|
||||
INIT_LIST_HEAD(&workspace->lru_list);
|
||||
} else {
|
||||
workspace->last_used = jiffies;
|
||||
list_add(&workspace->lru_list, &wsm.lru_list);
|
||||
if (!timer_pending(&wsm.timer))
|
||||
mod_timer(&wsm.timer,
|
||||
jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(workspace->level - 1, &wsm.active_map);
|
||||
list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
|
||||
workspace->req_level = 0;
|
||||
|
||||
spin_unlock(&wsm.lock);
|
||||
|
||||
if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
|
||||
cond_wake_up(&wsm.wait);
|
||||
}
|
||||
|
||||
static void zstd_free_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
@ -50,25 +335,25 @@ static void zstd_free_workspace(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *zstd_alloc_workspace(void)
|
||||
static struct list_head *zstd_alloc_workspace(unsigned int level)
|
||||
{
|
||||
ZSTD_parameters params =
|
||||
zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
|
||||
struct workspace *workspace;
|
||||
|
||||
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
|
||||
if (!workspace)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
workspace->size = max_t(size_t,
|
||||
ZSTD_CStreamWorkspaceBound(params.cParams),
|
||||
ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
|
||||
workspace->size = zstd_ws_mem_sizes[level - 1];
|
||||
workspace->level = level;
|
||||
workspace->req_level = level;
|
||||
workspace->last_used = jiffies;
|
||||
workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
|
||||
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!workspace->mem || !workspace->buf)
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&workspace->list);
|
||||
INIT_LIST_HEAD(&workspace->lru_list);
|
||||
|
||||
return &workspace->list;
|
||||
fail:
|
||||
@ -95,7 +380,8 @@ static int zstd_compress_pages(struct list_head *ws,
|
||||
unsigned long len = *total_out;
|
||||
const unsigned long nr_dest_pages = *out_pages;
|
||||
unsigned long max_out = nr_dest_pages * PAGE_SIZE;
|
||||
ZSTD_parameters params = zstd_get_btrfs_parameters(len);
|
||||
ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
|
||||
len);
|
||||
|
||||
*out_pages = 0;
|
||||
*total_out = 0;
|
||||
@ -419,11 +705,19 @@ finish:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zstd_set_level(struct list_head *ws, unsigned int type)
|
||||
static unsigned int zstd_set_level(unsigned int level)
|
||||
{
|
||||
if (!level)
|
||||
return ZSTD_BTRFS_DEFAULT_LEVEL;
|
||||
|
||||
return min_t(unsigned int, level, ZSTD_BTRFS_MAX_LEVEL);
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_zstd_compress = {
|
||||
.init_workspace_manager = zstd_init_workspace_manager,
|
||||
.cleanup_workspace_manager = zstd_cleanup_workspace_manager,
|
||||
.get_workspace = zstd_get_workspace,
|
||||
.put_workspace = zstd_put_workspace,
|
||||
.alloc_workspace = zstd_alloc_workspace,
|
||||
.free_workspace = zstd_free_workspace,
|
||||
.compress_pages = zstd_compress_pages,
|
||||
|
@ -1051,6 +1051,7 @@ TRACE_EVENT(btrfs_trigger_flush,
|
||||
{ FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR"}, \
|
||||
{ FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS"}, \
|
||||
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
|
||||
{ ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE"}, \
|
||||
{ COMMIT_TRANS, "COMMIT_TRANS"})
|
||||
|
||||
TRACE_EVENT(btrfs_flush_space,
|
||||
@ -1512,35 +1513,6 @@ DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data,
|
||||
TP_ARGS(inode, start, len, reserved, op)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 reserved),
|
||||
|
||||
TP_ARGS(fs_info, ref_root, reserved),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, ref_root )
|
||||
__field( u64, reserved )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->ref_root = ref_root;
|
||||
__entry->reserved = reserved;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("root=%llu reserved=%llu op=free",
|
||||
__entry->ref_root, __entry->reserved)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 reserved),
|
||||
|
||||
TP_ARGS(fs_info, ref_root, reserved)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_qgroup_extent_record *rec),
|
||||
|
@ -837,6 +837,8 @@ enum btrfs_err_code {
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
|
||||
struct btrfs_ioctl_vol_args)
|
||||
/* trans start and trans end are dangerous, and only for
|
||||
* use by applications that know how to avoid the
|
||||
* resulting deadlocks
|
||||
|
Loading…
Reference in New Issue
Block a user