mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 12:44:11 +08:00
bcachefs: btree gc refactoring
prep work for erasure coding Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
abce30b79b
commit
2252aa271c
@ -19,17 +19,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
|
||||
return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
|
||||
}
|
||||
|
||||
static inline bool btree_type_has_ptrs(enum bkey_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct bch_fs;
|
||||
struct btree;
|
||||
struct bkey;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "replicas.h"
|
||||
@ -32,6 +33,23 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched/task.h>
|
||||
|
||||
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&c->gc_pos_lock);
|
||||
c->gc_pos = new_pos;
|
||||
write_seqcount_end(&c->gc_pos_lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
|
||||
__gc_pos_set(c, new_pos);
|
||||
}
|
||||
|
||||
/* range_checks - for validating min/max pos of each btree node: */
|
||||
|
||||
struct range_checks {
|
||||
struct range_level {
|
||||
struct bpos min;
|
||||
@ -91,6 +109,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
}
|
||||
|
||||
/* marking of btree keys/nodes: */
|
||||
|
||||
static bool bkey_type_needs_gc(enum bkey_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
@ -113,38 +144,7 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
|
||||
return max_stale;
|
||||
}
|
||||
|
||||
/*
|
||||
* For runtime mark and sweep:
|
||||
*/
|
||||
static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k, unsigned flags)
|
||||
{
|
||||
struct gc_pos pos = { 0 };
|
||||
u8 ret = 0;
|
||||
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
break;
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
ret = bch2_btree_key_recalc_oldest_gen(c, k);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
|
||||
static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
|
||||
@ -199,54 +199,90 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
|
||||
}
|
||||
}
|
||||
|
||||
atomic64_set(&c->key_version,
|
||||
max_t(u64, k.k->version.lo,
|
||||
atomic64_read(&c->key_version)));
|
||||
|
||||
bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
|
||||
if (k.k->version.lo > atomic64_read(&c->key_version))
|
||||
atomic64_set(&c->key_version, k.k->version.lo);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
|
||||
/*
|
||||
* For runtime mark and sweep:
|
||||
*/
|
||||
static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
|
||||
struct bkey_s_c k, bool initial)
|
||||
{
|
||||
struct gc_pos pos = { 0 };
|
||||
unsigned flags = initial ? BCH_BUCKET_MARK_NOATOMIC : 0;
|
||||
int ret = 0;
|
||||
|
||||
switch (type) {
|
||||
case BKEY_TYPE_BTREE:
|
||||
if (initial) {
|
||||
ret = bch2_btree_mark_ptrs_initial(c, type, k);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_mark_key(c, k, c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
break;
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
if (initial) {
|
||||
ret = bch2_btree_mark_ptrs_initial(c, type, k);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
|
||||
0, flags|
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
ret = bch2_btree_key_recalc_oldest_gen(c, k);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
|
||||
bool initial)
|
||||
{
|
||||
enum bkey_type type = btree_node_type(b);
|
||||
struct btree_node_iter iter;
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
u8 stale = 0;
|
||||
int ret;
|
||||
|
||||
if (!bkey_type_needs_gc(type))
|
||||
return 0;
|
||||
|
||||
if (btree_node_has_ptrs(b))
|
||||
for_each_btree_node_key_unpack(b, k, &iter,
|
||||
&unpacked) {
|
||||
bch2_bkey_debugcheck(c, b, k);
|
||||
stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
|
||||
|
||||
ret = bch2_gc_mark_key(c, type, k, initial);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
stale = max_t(u8, stale, ret);
|
||||
}
|
||||
|
||||
return stale;
|
||||
}
|
||||
|
||||
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&c->gc_pos_lock);
|
||||
c->gc_pos = new_pos;
|
||||
write_seqcount_end(&c->gc_pos_lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
|
||||
{
|
||||
BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
|
||||
__gc_pos_set(c, new_pos);
|
||||
}
|
||||
|
||||
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
|
||||
bool initial)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
struct range_checks r;
|
||||
unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
|
||||
unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
|
||||
unsigned max_stale;
|
||||
int ret = 0;
|
||||
|
||||
@ -257,8 +293,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
|
||||
/*
|
||||
* if expensive_debug_checks is on, run range_checks on all leaf nodes:
|
||||
*
|
||||
* and on startup, we have to read every btree node (XXX: only if it was
|
||||
* an unclean shutdown)
|
||||
*/
|
||||
if (expensive_debug_checks(c))
|
||||
if (initial || expensive_debug_checks(c))
|
||||
depth = 0;
|
||||
|
||||
btree_node_range_checks_init(&r, depth);
|
||||
@ -269,10 +308,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
|
||||
bch2_verify_btree_nr_keys(b);
|
||||
|
||||
max_stale = btree_gc_mark_node(c, b);
|
||||
max_stale = btree_gc_mark_node(c, b, initial);
|
||||
|
||||
gc_pos_set(c, gc_pos_btree_node(b));
|
||||
|
||||
if (!initial) {
|
||||
if (max_stale > 64)
|
||||
bch2_btree_node_rewrite(c, &iter,
|
||||
b->data->keys.seq,
|
||||
@ -285,6 +325,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
b->data->keys.seq,
|
||||
BTREE_INSERT_NOWAIT|
|
||||
BTREE_INSERT_GC_LOCK_HELD);
|
||||
}
|
||||
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
}
|
||||
@ -296,13 +337,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
|
||||
b = c->btree_roots[btree_id].b;
|
||||
if (!btree_node_fake(b))
|
||||
bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
|
||||
bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
|
||||
bkey_i_to_s_c(&b->key), initial);
|
||||
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
|
||||
bool initial)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
enum bkey_type type = bkey_type(0, i);
|
||||
|
||||
int ret = bch2_gc_btree(c, i, initial);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (journal && bkey_type_needs_gc(type)) {
|
||||
struct bkey_i *k, *n;
|
||||
struct jset_entry *j;
|
||||
struct journal_replay *r;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(r, journal, list)
|
||||
for_each_jset_key(k, n, j, &r->j) {
|
||||
if (type == bkey_type(j->level, j->btree_id)) {
|
||||
ret = bch2_gc_mark_key(c, type,
|
||||
bkey_i_to_s_c(k), initial);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 start, u64 end,
|
||||
enum bch_data_type type,
|
||||
@ -525,6 +600,7 @@ void bch2_gc(struct bch_fs *c)
|
||||
struct bch_dev *ca;
|
||||
u64 start_time = local_clock();
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Walk _all_ references to buckets, and recompute them:
|
||||
@ -560,15 +636,12 @@ void bch2_gc(struct bch_fs *c)
|
||||
|
||||
bch2_mark_superblocks(c);
|
||||
|
||||
/* Walk btree: */
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
int ret = bch2_gc_btree(c, i);
|
||||
ret = bch2_gc_btrees(c, NULL, false);
|
||||
if (ret) {
|
||||
bch_err(c, "btree gc failed: %d", ret);
|
||||
set_bit(BCH_FS_GC_FAILURE, &c->flags);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_mark_pending_btree_node_frees(c);
|
||||
bch2_mark_allocator_buckets(c);
|
||||
@ -1009,58 +1082,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
|
||||
|
||||
/* Initial GC computes bucket marks during startup */
|
||||
|
||||
static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
struct range_checks r;
|
||||
int ret = 0;
|
||||
|
||||
btree_node_range_checks_init(&r, 0);
|
||||
|
||||
gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
|
||||
|
||||
if (!c->btree_roots[id].b)
|
||||
return 0;
|
||||
|
||||
b = c->btree_roots[id].b;
|
||||
if (!btree_node_fake(b))
|
||||
ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
|
||||
bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We have to hit every btree node before starting journal replay, in
|
||||
* order for the journal seq blacklist machinery to work:
|
||||
*/
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
btree_node_range_checks(c, b, &r);
|
||||
|
||||
if (btree_node_has_ptrs(b)) {
|
||||
struct btree_node_iter node_iter;
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
|
||||
for_each_btree_node_key_unpack(b, k, &node_iter,
|
||||
&unpacked) {
|
||||
ret = bch2_btree_mark_key_initial(c,
|
||||
btree_node_type(b), k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
}
|
||||
err:
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
|
||||
{
|
||||
unsigned iter = 0;
|
||||
enum btree_id id;
|
||||
int ret = 0;
|
||||
|
||||
down_write(&c->gc_lock);
|
||||
@ -1069,13 +1093,7 @@ again:
|
||||
|
||||
bch2_mark_superblocks(c);
|
||||
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
ret = bch2_initial_gc_btree(c, id);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_journal_mark(c, journal);
|
||||
ret = bch2_gc_btrees(c, journal, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
|
@ -12,8 +12,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
|
||||
int bch2_gc_thread_start(struct bch_fs *);
|
||||
int bch2_initial_gc(struct bch_fs *, struct list_head *);
|
||||
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
|
||||
int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
|
||||
struct bkey_s_c);
|
||||
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
|
||||
|
||||
/*
|
||||
|
@ -415,11 +415,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
|
||||
return &bch2_bkey_ops[btree_node_type(b)];
|
||||
}
|
||||
|
||||
static inline bool btree_node_has_ptrs(struct btree *b)
|
||||
{
|
||||
return btree_type_has_ptrs(btree_node_type(b));
|
||||
}
|
||||
|
||||
static inline bool btree_node_is_extents(struct btree *b)
|
||||
{
|
||||
return btree_node_type(b) == BKEY_TYPE_EXTENTS;
|
||||
|
@ -355,10 +355,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
|
||||
return true;
|
||||
}
|
||||
|
||||
int bch2_journal_mark(struct bch_fs *, struct list_head *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
||||
|
||||
static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
{
|
||||
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
|
||||
|
@ -852,28 +852,6 @@ fsck_err:
|
||||
|
||||
/* journal replay: */
|
||||
|
||||
int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
|
||||
{
|
||||
struct bkey_i *k, *n;
|
||||
struct jset_entry *j;
|
||||
struct journal_replay *r;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(r, list, list)
|
||||
for_each_jset_key(k, n, j, &r->j) {
|
||||
enum bkey_type type = bkey_type(j->level, j->btree_id);
|
||||
struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
|
||||
|
||||
if (btree_type_has_ptrs(type)) {
|
||||
ret = bch2_btree_mark_key_initial(c, type, k_s_c);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
|
@ -37,6 +37,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
|
||||
|
||||
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
|
||||
int bch2_journal_read(struct bch_fs *, struct list_head *);
|
||||
void bch2_journal_entries_free(struct list_head *);
|
||||
int bch2_journal_replay(struct bch_fs *, struct list_head *);
|
||||
|
||||
int bch2_journal_entry_sectors(struct journal *);
|
||||
void bch2_journal_write(struct closure *);
|
||||
|
Loading…
Reference in New Issue
Block a user