mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-27 06:04:23 +08:00
bcachefs: BTREE_ITER_WITH_JOURNAL
This adds a new btree iterator flag, BTREE_ITER_WITH_JOURNAL, that is automatically enabled when initializing a btree iterator before journal replay has completed - it overlays the contents of the journal with the btree. This lets us delete bch2_btree_and_journal_walk() and just use the normal btree iterator interface instead - which also lets us delete a significant amount of duplicated code. Note that BTREE_ITER_WITH_JOURNAL is still unoptimized in this patch - we're redoing the binary search over keys in the journal every time we call bch2_btree_iter_peek(). Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
f28620c108
commit
5222a4607c
@ -340,15 +340,23 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
#undef x
|
||||
}
|
||||
|
||||
static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bkey_alloc_unpacked u;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (!bkey_is_alloc(k.k))
|
||||
return 0;
|
||||
continue;
|
||||
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = bucket(ca, k.k->p.offset);
|
||||
@ -366,20 +374,12 @@ static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_alloc_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
down_read(&c->gc_lock);
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
|
||||
up_read(&c->gc_lock);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret) {
|
||||
bch_err(c, "error reading alloc info: %i", ret);
|
||||
return ret;
|
||||
|
@ -860,7 +860,6 @@ mempool_t bio_bounce_pages;
|
||||
u64 reflink_hint;
|
||||
reflink_gc_table reflink_gc_table;
|
||||
size_t reflink_gc_nr;
|
||||
size_t reflink_gc_idx;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
|
@ -1342,59 +1342,6 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
|
||||
if (!r->refcount) {
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
new->k.size = 0;
|
||||
} else {
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
|
||||
kfree(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
@ -1411,14 +1358,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
if (initial) {
|
||||
c->reflink_gc_idx = 0;
|
||||
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_done_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
@ -1426,7 +1365,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx);
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx++);
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
@ -1456,7 +1395,9 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
else
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
|
||||
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
||||
kfree(new);
|
||||
|
||||
@ -1466,33 +1407,40 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
out:
|
||||
c->reflink_gc_nr = 0;
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct gc_stripe *m;
|
||||
const struct bch_stripe *s;
|
||||
char buf[200];
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
continue;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
||||
|
||||
for (i = 0; i < s->nr_blocks; i++)
|
||||
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
|
||||
goto inconsistent;
|
||||
return 0;
|
||||
continue;
|
||||
inconsistent:
|
||||
if (fsck_err_on(true, c,
|
||||
"stripe has wrong block sector count %u:\n"
|
||||
@ -1505,7 +1453,7 @@ inconsistent:
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
break;
|
||||
}
|
||||
|
||||
bkey_reassemble(&new->k_i, k);
|
||||
@ -1513,57 +1461,20 @@ inconsistent:
|
||||
for (i = 0; i < new->v.nr_blocks; i++)
|
||||
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
|
||||
ret = initial
|
||||
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
|
||||
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
|
||||
kfree(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret = 0;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
if (initial) {
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
||||
bch2_gc_stripes_done_initial_fn);
|
||||
} else {
|
||||
BUG();
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
|
||||
struct bch_fs *c = trans->c;
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
@ -1579,12 +1490,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
if (initial) {
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_start_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
@ -1604,7 +1509,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
r->refcount = 0;
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
out:
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return ret;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "subvolume.h"
|
||||
#include "trace.h"
|
||||
@ -1064,6 +1065,7 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
|
||||
static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
struct btree_path *path, struct btree *b)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path_level *l;
|
||||
unsigned plevel;
|
||||
bool parent_locked;
|
||||
@ -1072,6 +1074,9 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||
return;
|
||||
|
||||
if (trans->journal_replay_not_finished)
|
||||
return;
|
||||
|
||||
plevel = b->c.level + 1;
|
||||
if (!btree_path_node(path, plevel))
|
||||
return;
|
||||
@ -1092,7 +1097,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||
char buf4[100];
|
||||
struct bkey uk = bkey_unpack_key(b, k);
|
||||
|
||||
bch2_dump_btree_node(trans->c, l->b);
|
||||
bch2_dump_btree_node(c, l->b);
|
||||
bch2_bpos_to_text(&PBUF(buf1), path->pos);
|
||||
bch2_bkey_to_text(&PBUF(buf2), &uk);
|
||||
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
|
||||
@ -1283,6 +1288,41 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
|
||||
struct btree_and_journal_iter *jiter)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
|
||||
? (path->level > 1 ? 0 : 2)
|
||||
: (path->level > 1 ? 1 : 16);
|
||||
bool was_locked = btree_node_locked(path, path->level);
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (nr && !ret) {
|
||||
if (!bch2_btree_node_relock(trans, path, path->level))
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_advance(jiter);
|
||||
k = bch2_btree_and_journal_iter_peek(jiter);
|
||||
if (!k.k)
|
||||
break;
|
||||
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
|
||||
path->level - 1);
|
||||
}
|
||||
|
||||
if (!was_locked)
|
||||
btree_node_unlock(path, path->level);
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned plevel, struct btree *b)
|
||||
@ -1305,6 +1345,30 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
||||
btree_node_unlock(path, plevel);
|
||||
}
|
||||
|
||||
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
struct bkey_buf *out)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_path_level *l = path_l(path);
|
||||
struct btree_and_journal_iter jiter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
|
||||
|
||||
k = bch2_btree_and_journal_iter_peek(&jiter);
|
||||
|
||||
bch2_bkey_buf_reassemble(out, c, k);
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH)
|
||||
ret = btree_path_prefetch_j(trans, path, &jiter);
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&jiter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
struct btree_path *path,
|
||||
unsigned flags,
|
||||
@ -1321,9 +1385,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
EBUG_ON(!btree_node_locked(path, path->level));
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished)) {
|
||||
ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
bch2_bkey_buf_unpack(&tmp, c, l->b,
|
||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH) {
|
||||
ret = btree_path_prefetch(trans, path);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
|
||||
ret = PTR_ERR_OR_ZERO(b);
|
||||
if (unlikely(ret))
|
||||
@ -1332,13 +1409,11 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||
mark_btree_node_locked(path, level, lock_type);
|
||||
btree_path_level_init(trans, path, b);
|
||||
|
||||
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
if (likely(!trans->journal_replay_not_finished &&
|
||||
tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
|
||||
unlikely(b != btree_node_mem_ptr(tmp.k)))
|
||||
btree_node_mem_ptr_set(trans, path, level + 1, b);
|
||||
|
||||
if (flags & BTREE_ITER_PREFETCH)
|
||||
ret = btree_path_prefetch(trans, path);
|
||||
|
||||
if (btree_node_read_locked(path, level + 1))
|
||||
btree_node_unlock(path, level + 1);
|
||||
path->level = level;
|
||||
@ -2113,6 +2188,55 @@ struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_path *path)
|
||||
{
|
||||
struct journal_keys *keys = &trans->c->journal_keys;
|
||||
size_t idx = bch2_journal_key_search(keys, path->btree_id,
|
||||
path->level, path->pos);
|
||||
|
||||
while (idx < keys->nr && keys->d[idx].overwritten)
|
||||
idx++;
|
||||
|
||||
return (idx < keys->nr &&
|
||||
keys->d[idx].btree_id == path->btree_id &&
|
||||
keys->d[idx].level == path->level)
|
||||
? keys->d[idx].k
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
struct bkey_i *k = __btree_trans_peek_journal(trans, iter->path);
|
||||
|
||||
if (k && !bpos_cmp(k->k.p, iter->pos)) {
|
||||
iter->k = k->k;
|
||||
return bkey_i_to_s_c(k);
|
||||
} else {
|
||||
return bkey_s_c_null;
|
||||
}
|
||||
}
|
||||
|
||||
static noinline
|
||||
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_i *next_journal =
|
||||
__btree_trans_peek_journal(trans, iter->path);
|
||||
|
||||
if (next_journal &&
|
||||
bpos_cmp(next_journal->k.p,
|
||||
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
|
||||
iter->k = next_journal->k;
|
||||
k = bkey_i_to_s_c(next_journal);
|
||||
}
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
|
||||
* current position
|
||||
@ -2141,16 +2265,12 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
goto out;
|
||||
}
|
||||
|
||||
next_update = btree_trans_peek_updates(iter);
|
||||
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
||||
|
||||
/* * In the btree, deleted keys sort before non deleted: */
|
||||
if (k.k && bkey_deleted(k.k) &&
|
||||
(!next_update ||
|
||||
bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
|
||||
search_key = k.k->p;
|
||||
continue;
|
||||
}
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
||||
k = btree_trans_peek_journal(trans, iter, k);
|
||||
|
||||
next_update = btree_trans_peek_updates(iter);
|
||||
|
||||
if (next_update &&
|
||||
bpos_cmp(next_update->k.p,
|
||||
@ -2159,6 +2279,20 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
}
|
||||
|
||||
if (k.k && bkey_deleted(k.k)) {
|
||||
/*
|
||||
* If we've got a whiteout, and it's after the search
|
||||
* key, advance the search key to the whiteout instead
|
||||
* of just after the whiteout - it might be a btree
|
||||
* whiteout, with a real key at the same position, since
|
||||
* in the btree deleted keys sort before non deleted.
|
||||
*/
|
||||
search_key = bpos_cmp(search_key, k.k->p)
|
||||
? k.k->p
|
||||
: bpos_successor(k.k->p);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (likely(k.k)) {
|
||||
/*
|
||||
* We can never have a key in a leaf node at POS_MAX, so
|
||||
@ -2249,6 +2383,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
||||
|
||||
EBUG_ON(iter->path->cached || iter->path->level);
|
||||
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
|
||||
|
||||
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
|
||||
return bkey_s_c_err(-EIO);
|
||||
|
||||
bch2_btree_iter_verify(iter);
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
|
||||
@ -2395,23 +2533,18 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
||||
struct bkey_i *next_update;
|
||||
|
||||
next_update = btree_trans_peek_updates(iter);
|
||||
if (next_update &&
|
||||
if ((next_update = btree_trans_peek_updates(iter)) &&
|
||||
!bpos_cmp(next_update->k.p, iter->pos)) {
|
||||
iter->k = next_update->k;
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
} else {
|
||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!k.k ||
|
||||
((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
|
||||
? bpos_cmp(iter->pos, k.k->p)
|
||||
: bkey_cmp(iter->pos, k.k->p))) {
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos;
|
||||
k = (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
|
||||
(k = btree_trans_peek_slot_journal(trans, iter)).k)
|
||||
goto out;
|
||||
|
||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||
} else {
|
||||
struct bpos next;
|
||||
|
||||
@ -2455,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
||||
k = (struct bkey_s_c) { &iter->k, NULL };
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
iter->path->should_be_locked = true;
|
||||
|
||||
bch2_btree_iter_verify_entry_exit(iter);
|
||||
@ -2635,6 +2768,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
|
||||
btree_type_has_snapshots(btree_id))
|
||||
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
||||
|
||||
if (trans->journal_replay_not_finished)
|
||||
flags |= BTREE_ITER_WITH_JOURNAL;
|
||||
|
||||
iter->trans = trans;
|
||||
iter->path = NULL;
|
||||
iter->btree_id = btree_id;
|
||||
@ -2801,6 +2937,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
||||
memset(trans, 0, sizeof(*trans));
|
||||
trans->c = c;
|
||||
trans->ip = _RET_IP_;
|
||||
trans->journal_replay_not_finished =
|
||||
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
||||
|
||||
bch2_trans_alloc_paths(trans, c);
|
||||
|
||||
|
@ -207,10 +207,11 @@ struct btree_node_iter {
|
||||
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
|
||||
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
|
||||
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
|
||||
#define BTREE_ITER_NOPRESERVE (1 << 14)
|
||||
#define BTREE_ITER_WITH_JOURNAL (1 << 11)
|
||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
|
||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
|
||||
#define BTREE_ITER_NOPRESERVE (1 << 15)
|
||||
|
||||
enum btree_path_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
@ -381,6 +382,7 @@ struct btree_trans {
|
||||
bool restarted:1;
|
||||
bool paths_sorted:1;
|
||||
bool journal_transaction_names:1;
|
||||
bool journal_replay_not_finished:1;
|
||||
/*
|
||||
* For when bch2_trans_update notices we'll be splitting a compressed
|
||||
* extent:
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "journal.h"
|
||||
#include "journal_reclaim.h"
|
||||
#include "keylist.h"
|
||||
#include "recovery.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -1146,6 +1147,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
||||
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||
!btree_ptr_sectors_written(insert));
|
||||
|
||||
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
|
||||
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
|
||||
|
||||
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
|
||||
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
|
||||
if (invalid) {
|
||||
|
@ -711,7 +711,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
||||
|
||||
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
|
||||
|
||||
if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
|
||||
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||
bch2_drop_overwrites_from_journal(trans);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
|
@ -1558,20 +1558,26 @@ void bch2_stripes_heap_start(struct bch_fs *c)
|
||||
bch2_stripes_heap_insert(c, m, iter.pos);
|
||||
}
|
||||
|
||||
static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
const struct bch_stripe *s;
|
||||
struct bch_fs *c = trans->c;
|
||||
struct stripe *m;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
if (k.k->type != KEY_TYPE_stripe)
|
||||
return 0;
|
||||
continue;
|
||||
|
||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
s = bkey_s_c_to_stripe(k).v;
|
||||
|
||||
@ -1589,19 +1595,11 @@ static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
||||
spin_lock(&c->ec_stripes_heap_lock);
|
||||
bch2_stripes_heap_update(c, m, k.k->p.offset);
|
||||
spin_unlock(&c->ec_stripes_heap_lock);
|
||||
}
|
||||
bch2_trans_iter_exit(&trans, &iter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_stripes_read(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
||||
bch2_stripes_read_fn);
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
if (ret)
|
||||
bch_err(c, "error reading stripes: %i", ret);
|
||||
|
||||
|
@ -59,21 +59,19 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
|
||||
static int __journal_key_cmp(enum btree_id l_btree_id,
|
||||
unsigned l_level,
|
||||
struct bpos l_pos,
|
||||
struct journal_key *r)
|
||||
const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||
cmp_int(l_level, r->level) ?:
|
||||
bpos_cmp(l_pos, r->k->k.p));
|
||||
}
|
||||
|
||||
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
|
||||
static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||
{
|
||||
return (cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p));
|
||||
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||
}
|
||||
|
||||
static size_t journal_key_search(struct journal_keys *journal_keys,
|
||||
size_t bch2_journal_key_search(struct journal_keys *journal_keys,
|
||||
enum btree_id id, unsigned level,
|
||||
struct bpos pos)
|
||||
{
|
||||
@ -125,7 +123,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
};
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_iter *iter;
|
||||
unsigned idx = journal_key_search(keys, id, level, k->k.p);
|
||||
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||
|
||||
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
|
||||
|
||||
@ -164,6 +162,11 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Can only be used from the recovery thread while we're still RO - can't be
|
||||
* used once we've got RW, as journal_keys is at that point used by multiple
|
||||
* threads:
|
||||
*/
|
||||
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
|
||||
unsigned level, struct bkey_i *k)
|
||||
{
|
||||
@ -196,7 +199,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
unsigned level, struct bpos pos)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
size_t idx = journal_key_search(keys, btree, level, pos);
|
||||
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||
|
||||
if (idx < keys->nr &&
|
||||
keys->d[idx].btree_id == btree &&
|
||||
@ -207,15 +210,18 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
||||
|
||||
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
|
||||
{
|
||||
struct journal_key *k = iter->idx - iter->keys->nr
|
||||
? iter->keys->d + iter->idx : NULL;
|
||||
struct journal_key *k = iter->keys->d + iter->idx;
|
||||
|
||||
if (k &&
|
||||
while (k < iter->keys->d + iter->keys->nr &&
|
||||
k->btree_id == iter->btree_id &&
|
||||
k->level == iter->level)
|
||||
k->level == iter->level) {
|
||||
if (!k->overwritten)
|
||||
return k->k;
|
||||
|
||||
iter->idx = iter->keys->nr;
|
||||
iter->idx++;
|
||||
k = iter->keys->d + iter->idx;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -238,8 +244,7 @@ static void bch2_journal_iter_init(struct bch_fs *c,
|
||||
iter->btree_id = id;
|
||||
iter->level = level;
|
||||
iter->keys = &c->journal_keys;
|
||||
iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
|
||||
list_add(&iter->list, &c->journal_iters);
|
||||
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
|
||||
}
|
||||
|
||||
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
|
||||
@ -325,106 +330,33 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
|
||||
bch2_journal_iter_exit(&iter->journal);
|
||||
}
|
||||
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
struct btree *b)
|
||||
struct btree *b,
|
||||
struct btree_node_iter node_iter,
|
||||
struct bpos pos)
|
||||
{
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
iter->b = b;
|
||||
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
|
||||
bch2_journal_iter_init(c, &iter->journal,
|
||||
b->c.btree_id, b->c.level, b->data->min_key);
|
||||
iter->node_iter = node_iter;
|
||||
bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
|
||||
INIT_LIST_HEAD(&iter->journal.list);
|
||||
}
|
||||
|
||||
/* Walk btree, overlaying keys from the journal: */
|
||||
|
||||
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
|
||||
struct btree_and_journal_iter iter)
|
||||
/*
|
||||
* this version is used by btree_gc before filesystem has gone RW and
|
||||
* multithreaded, so uses the journal_iters list:
|
||||
*/
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||
struct bch_fs *c,
|
||||
struct btree *b)
|
||||
{
|
||||
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
struct btree_node_iter node_iter;
|
||||
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
while (i < nr &&
|
||||
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
|
||||
bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1);
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
i++;
|
||||
}
|
||||
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
}
|
||||
|
||||
static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
|
||||
enum btree_id btree_id,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_and_journal_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf tmp;
|
||||
struct btree *child;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
||||
|
||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
||||
if (b->c.level) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||
|
||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
||||
b->c.btree_id, b->c.level - 1,
|
||||
false);
|
||||
|
||||
ret = PTR_ERR_OR_ZERO(child);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
btree_and_journal_iter_prefetch(c, b, iter);
|
||||
|
||||
ret = bch2_btree_and_journal_walk_recurse(trans, child,
|
||||
btree_id, key_fn);
|
||||
six_unlock_read(&child->c.lock);
|
||||
} else {
|
||||
ret = key_fn(trans, k);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bch2_btree_and_journal_iter_advance(&iter);
|
||||
}
|
||||
|
||||
bch2_btree_and_journal_iter_exit(&iter);
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
|
||||
btree_walk_key_fn key_fn)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree *b = c->btree_roots[btree_id].b;
|
||||
int ret = 0;
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return 0;
|
||||
|
||||
six_lock_read(&b->c.lock, NULL, NULL);
|
||||
ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
|
||||
six_unlock_read(&b->c.lock);
|
||||
|
||||
return ret;
|
||||
bch2_btree_node_iter_init_from_start(&node_iter, b);
|
||||
__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
|
||||
list_add(&iter->journal.list, &c->journal_iters);
|
||||
}
|
||||
|
||||
/* sort and dedup all keys in the journal: */
|
||||
@ -449,9 +381,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
||||
const struct journal_key *l = _l;
|
||||
const struct journal_key *r = _r;
|
||||
|
||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
||||
cmp_int(l->level, r->level) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p) ?:
|
||||
return journal_key_cmp(l, r) ?:
|
||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->journal_offset, r->journal_offset);
|
||||
}
|
||||
|
@ -31,6 +31,9 @@ struct btree_and_journal_iter {
|
||||
} last;
|
||||
};
|
||||
|
||||
size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
|
||||
unsigned, struct bpos);
|
||||
|
||||
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
|
||||
unsigned, struct bkey_i *);
|
||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||
@ -45,14 +48,13 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
|
||||
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
|
||||
|
||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *, struct btree *,
|
||||
struct btree_node_iter, struct bpos);
|
||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||
struct bch_fs *,
|
||||
struct btree *);
|
||||
|
||||
typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
|
||||
|
||||
int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
|
||||
|
||||
void bch2_journal_keys_free(struct journal_keys *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user