mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-27 00:04:47 +08:00
bcachefs: bch2_trans_mark_update()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
c43a6ef9a0
commit
932aa83745
@ -141,8 +141,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_alloc_pack(struct bkey_i_alloc *dst,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
void bch2_alloc_pack(struct bkey_i_alloc *dst,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
{
|
||||
unsigned idx = 0;
|
||||
void *d = dst->v.data;
|
||||
@ -962,7 +962,6 @@ retry:
|
||||
|
||||
invalidating_cached_data = m.cached_sectors != 0;
|
||||
|
||||
//BUG_ON(u.dirty_sectors);
|
||||
u.data_type = 0;
|
||||
u.dirty_sectors = 0;
|
||||
u.cached_sectors = 0;
|
||||
@ -974,6 +973,7 @@ retry:
|
||||
* we have to trust the in memory bucket @m, not the version in the
|
||||
* btree:
|
||||
*/
|
||||
//BUG_ON(u.dirty_sectors);
|
||||
u.gen = m.gen + 1;
|
||||
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
|
@ -14,6 +14,8 @@ struct bkey_alloc_unpacked {
|
||||
};
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
|
||||
void bch2_alloc_pack(struct bkey_i_alloc *,
|
||||
const struct bkey_alloc_unpacked);
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
|
||||
|
||||
|
@ -1300,6 +1300,7 @@ enum bch_sb_features {
|
||||
|
||||
enum bch_sb_compat {
|
||||
BCH_COMPAT_FEAT_ALLOC_INFO = 0,
|
||||
BCH_COMPAT_FEAT_ALLOC_METADATA = 1,
|
||||
};
|
||||
|
||||
/* options: */
|
||||
|
@ -1005,7 +1005,7 @@ retry_all:
|
||||
goto retry_all;
|
||||
}
|
||||
|
||||
ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0;
|
||||
ret = hweight64(trans->iters_live) > 1 ? -EINTR : 0;
|
||||
out:
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
return ret;
|
||||
@ -1103,8 +1103,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
if (unlikely(ret))
|
||||
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
|
||||
|
||||
BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/rhashtable.h>
|
||||
|
||||
#include "bkey_methods.h"
|
||||
#include "buckets_types.h"
|
||||
#include "journal_types.h"
|
||||
#include "six.h"
|
||||
|
||||
@ -264,6 +265,7 @@ struct btree_insert_entry {
|
||||
};
|
||||
|
||||
bool deferred;
|
||||
bool triggered;
|
||||
};
|
||||
|
||||
#define BTREE_ITER_MAX 64
|
||||
@ -302,6 +304,8 @@ struct btree_trans {
|
||||
|
||||
struct btree_iter iters_onstack[2];
|
||||
struct btree_insert_entry updates_onstack[6];
|
||||
|
||||
struct replicas_delta_list fs_usage_deltas;
|
||||
};
|
||||
|
||||
#define BTREE_FLAG(flag) \
|
||||
|
@ -43,8 +43,11 @@ enum {
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__BTREE_INSERT_JOURNAL_RESERVED,
|
||||
__BTREE_INSERT_NOMARK_INSERT,
|
||||
__BTREE_INSERT_NOMARK_OVERWRITES,
|
||||
__BTREE_INSERT_NOMARK,
|
||||
__BTREE_INSERT_MARK_INMEM,
|
||||
__BTREE_INSERT_NO_CLEAR_REPLICAS,
|
||||
__BTREE_INSERT_NOWAIT,
|
||||
__BTREE_INSERT_GC_LOCK_HELD,
|
||||
__BCH_HASH_SET_MUST_CREATE,
|
||||
@ -77,12 +80,20 @@ enum {
|
||||
|
||||
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
|
||||
|
||||
/* Don't mark new key, just overwrites: */
|
||||
#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
|
||||
|
||||
/* Don't mark overwrites, just new key: */
|
||||
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
|
||||
|
||||
/* Don't call bch2_mark_key: */
|
||||
/* Don't call mark new key at all: */
|
||||
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
|
||||
|
||||
/* Don't mark transactionally: */
|
||||
#define BTREE_INSERT_MARK_INMEM (1 << __BTREE_INSERT_MARK_INMEM)
|
||||
|
||||
#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
|
||||
|
||||
/* Don't block on allocation failure (for new btree nodes: */
|
||||
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
|
||||
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
|
||||
|
@ -526,6 +526,22 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
|
||||
btree_insert_key_deferred(trans, insert);
|
||||
}
|
||||
|
||||
static inline bool update_triggers_transactional(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i)
|
||||
{
|
||||
return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
|
||||
(i->iter->btree_id == BTREE_ID_EXTENTS ||
|
||||
i->iter->btree_id == BTREE_ID_INODES);
|
||||
}
|
||||
|
||||
static inline bool update_has_triggers(struct btree_trans *trans,
|
||||
struct btree_insert_entry *i)
|
||||
{
|
||||
return likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
|
||||
!i->deferred &&
|
||||
btree_node_type_needs_gc(i->iter->btree_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get journal reservation, take write locks, and attempt to do btree update(s):
|
||||
*/
|
||||
@ -538,29 +554,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
struct btree_iter *linked;
|
||||
int ret;
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
|
||||
memset(&trans->fs_usage_deltas.fs_usage, 0,
|
||||
sizeof(trans->fs_usage_deltas.fs_usage));
|
||||
trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
|
||||
}
|
||||
|
||||
trans_for_each_update_iter(trans, i)
|
||||
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
|
||||
|
||||
btree_trans_lock_write(c, trans);
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
if (i->deferred ||
|
||||
!btree_node_type_needs_gc(i->iter->btree_id))
|
||||
continue;
|
||||
|
||||
if (!fs_usage) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||
}
|
||||
|
||||
if (!bch2_bkey_replicas_marked_locked(c,
|
||||
bkey_i_to_s_c(i->k), true)) {
|
||||
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
|
||||
goto out;
|
||||
}
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (update_has_triggers(trans, i) &&
|
||||
update_triggers_transactional(trans, i)) {
|
||||
ret = bch2_trans_mark_update(trans, i,
|
||||
&trans->fs_usage_deltas);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
btree_trans_lock_write(c, trans);
|
||||
|
||||
if (race_fault()) {
|
||||
ret = -EINTR;
|
||||
@ -578,6 +590,23 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
trans_for_each_update_iter(trans, i) {
|
||||
if (i->deferred ||
|
||||
!btree_node_type_needs_gc(i->iter->btree_id))
|
||||
continue;
|
||||
|
||||
if (!fs_usage) {
|
||||
percpu_down_read(&c->mark_lock);
|
||||
fs_usage = bch2_fs_usage_scratch_get(c);
|
||||
}
|
||||
|
||||
if (!bch2_bkey_replicas_marked_locked(c,
|
||||
bkey_i_to_s_c(i->k), true)) {
|
||||
ret = BTREE_INSERT_NEED_MARK_REPLICAS;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't get journal reservation until after we know insert will
|
||||
* succeed:
|
||||
@ -606,20 +635,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
|
||||
linked->flags |= BTREE_ITER_NOUNLOCK;
|
||||
}
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
|
||||
trans_for_each_update_iter(trans, i)
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (update_has_triggers(trans, i) &&
|
||||
!update_triggers_transactional(trans, i))
|
||||
bch2_mark_update(trans, i, &fs_usage->u, 0);
|
||||
if (fs_usage)
|
||||
bch2_trans_fs_usage_apply(trans, fs_usage);
|
||||
|
||||
if (unlikely(c->gc_pos.phase)) {
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
|
||||
bch2_mark_update(trans, i, NULL,
|
||||
BCH_BUCKET_MARK_GC);
|
||||
}
|
||||
if (fs_usage) {
|
||||
bch2_replicas_delta_list_apply(c, &fs_usage->u,
|
||||
&trans->fs_usage_deltas);
|
||||
bch2_trans_fs_usage_apply(trans, fs_usage);
|
||||
}
|
||||
|
||||
if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
|
||||
unlikely(c->gc_pos.phase))
|
||||
trans_for_each_update_iter(trans, i)
|
||||
if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
|
||||
bch2_mark_update(trans, i, NULL,
|
||||
BCH_BUCKET_MARK_GC);
|
||||
|
||||
trans_for_each_update(trans, i)
|
||||
do_btree_insert_one(trans, i);
|
||||
out:
|
||||
@ -646,6 +679,19 @@ int bch2_trans_commit_error(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
unsigned flags = trans->flags;
|
||||
struct btree_insert_entry *src, *dst;
|
||||
|
||||
src = dst = trans->updates;
|
||||
|
||||
while (src < trans->updates + trans->nr_updates) {
|
||||
if (!src->triggered) {
|
||||
*dst = *src;
|
||||
dst++;
|
||||
}
|
||||
src++;
|
||||
}
|
||||
|
||||
trans->nr_updates = dst - trans->updates;
|
||||
|
||||
/*
|
||||
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
|
||||
@ -808,6 +854,7 @@ int bch2_trans_commit(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_insert_entry *i;
|
||||
unsigned orig_mem_top = trans->mem_top;
|
||||
int ret = 0;
|
||||
|
||||
if (!trans->nr_updates)
|
||||
@ -885,8 +932,16 @@ out_noupdates:
|
||||
return ret;
|
||||
err:
|
||||
ret = bch2_trans_commit_error(trans, i, ret);
|
||||
if (!ret)
|
||||
|
||||
/* can't loop if it was passed in and we changed it: */
|
||||
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
|
||||
ret = -EINTR;
|
||||
|
||||
if (!ret) {
|
||||
/* free memory used by triggers, they'll be reexecuted: */
|
||||
trans->mem_top = orig_mem_top;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
@ -969,6 +1024,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
|
||||
|
||||
@ -1014,5 +1070,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
|
||||
}
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
BUG_ON(ret == -EINTR);
|
||||
return ret;
|
||||
}
|
||||
|
@ -653,19 +653,16 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
|
||||
/*
|
||||
* this should currently only be getting called from the bucket
|
||||
* invalidate path:
|
||||
*/
|
||||
BUG_ON(u.dirty_sectors);
|
||||
BUG_ON(u.cached_sectors);
|
||||
BUG_ON(!g->mark.owned_by_allocator);
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
|
||||
m.gen = u.gen;
|
||||
m.data_type = u.data_type;
|
||||
m.dirty_sectors = u.dirty_sectors;
|
||||
m.cached_sectors = u.cached_sectors;
|
||||
|
||||
if (!(flags & BCH_BUCKET_MARK_GC)) {
|
||||
m.journal_seq_valid = 1;
|
||||
m.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
|
||||
g->io_time[READ] = u.read_time;
|
||||
@ -673,6 +670,11 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
|
||||
/*
|
||||
* need to know if we're getting called from the invalidate path or
|
||||
* not:
|
||||
*/
|
||||
|
||||
if (old.cached_sectors) {
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old.cached_sectors);
|
||||
@ -762,11 +764,34 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Checking against gc's position has to be done here, inside the cmpxchg()
|
||||
* loop, to avoid racing with the start of gc clearing all the marks - GC does
|
||||
* that with the gc pos seqlock held.
|
||||
*/
|
||||
static void bucket_set_stripe(struct bch_fs *c,
|
||||
const struct bch_stripe *v,
|
||||
bool enabled,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq,
|
||||
bool gc)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < v->nr_blocks; i++) {
|
||||
const struct bch_extent_ptr *ptr = v->ptrs + i;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
|
||||
struct bucket_mark new, old;
|
||||
|
||||
BUG_ON(ptr_stale(ca, ptr));
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_mark_pointer(struct bch_fs *c,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
@ -776,8 +801,7 @@ static bool bch2_mark_pointer(struct bch_fs *c,
|
||||
{
|
||||
struct bucket_mark old, new;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, &p.ptr);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
|
||||
bool overflow;
|
||||
u64 v;
|
||||
|
||||
@ -946,35 +970,6 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bucket_set_stripe(struct bch_fs *c,
|
||||
const struct bch_stripe *v,
|
||||
bool enabled,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
u64 journal_seq,
|
||||
bool gc)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < v->nr_blocks; i++) {
|
||||
const struct bch_extent_ptr *ptr = v->ptrs + i;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, ptr);
|
||||
struct bucket *g = __bucket(ca, b, gc);
|
||||
struct bucket_mark new, old;
|
||||
|
||||
BUG_ON(ptr_stale(ca, ptr));
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
|
||||
new.dirty = true;
|
||||
new.stripe = enabled;
|
||||
if (journal_seq) {
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
bool inserting,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
@ -1006,14 +1001,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
|
||||
m->nr_blocks = s.v->nr_blocks;
|
||||
m->nr_redundant = s.v->nr_redundant;
|
||||
|
||||
memset(&m->r, 0, sizeof(m->r));
|
||||
|
||||
m->r.e.data_type = BCH_DATA_USER;
|
||||
m->r.e.nr_devs = s.v->nr_blocks;
|
||||
m->r.e.nr_required = s.v->nr_blocks - s.v->nr_redundant;
|
||||
|
||||
for (i = 0; i < s.v->nr_blocks; i++)
|
||||
m->r.e.devs[i] = s.v->ptrs[i].dev;
|
||||
bch2_bkey_to_replicas(&m->r.e, k);
|
||||
|
||||
/*
|
||||
* XXX: account for stripes somehow here
|
||||
@ -1180,10 +1168,11 @@ int bch2_mark_update(struct btree_trans *trans,
|
||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||
return 0;
|
||||
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||
bkey_start_offset(&insert->k->k),
|
||||
fs_usage, trans->journal_res.seq, flags);
|
||||
if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||
bkey_start_offset(&insert->k->k),
|
||||
fs_usage, trans->journal_res.seq, flags);
|
||||
|
||||
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
|
||||
return 0;
|
||||
@ -1262,6 +1251,391 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
/* trans_mark: */
|
||||
|
||||
static inline void update_replicas_list(struct replicas_delta_list *d,
|
||||
struct bch_replicas_entry *r,
|
||||
s64 sectors)
|
||||
{
|
||||
d->top->delta = sectors;
|
||||
memcpy(&d->top->r, r, replicas_entry_bytes(r));
|
||||
|
||||
d->top = (void *) d->top + replicas_entry_bytes(r) + 8;
|
||||
|
||||
BUG_ON((void *) d->top > (void *) d->d + sizeof(d->pad));
|
||||
}
|
||||
|
||||
static inline void update_cached_sectors_list(struct replicas_delta_list *d,
|
||||
unsigned dev, s64 sectors)
|
||||
{
|
||||
struct bch_replicas_padded r;
|
||||
|
||||
bch2_replicas_entry_cached(&r.e, dev);
|
||||
|
||||
update_replicas_list(d, &r.e, sectors);
|
||||
}
|
||||
|
||||
void bch2_replicas_delta_list_apply(struct bch_fs *c,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
struct replicas_delta_list *r)
|
||||
{
|
||||
struct replicas_delta *d = r->d;
|
||||
|
||||
acc_u64s((u64 *) fs_usage,
|
||||
(u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
|
||||
|
||||
while (d != r->top) {
|
||||
BUG_ON((void *) d > (void *) r->top);
|
||||
|
||||
update_replicas(c, fs_usage, &d->r, d->delta);
|
||||
|
||||
d = (void *) d + replicas_entry_bytes(&d->r) + 8;
|
||||
}
|
||||
}
|
||||
|
||||
static int trans_get_key(struct btree_trans *trans,
|
||||
enum btree_id btree_id, struct bpos pos,
|
||||
struct btree_insert_entry **insert,
|
||||
struct btree_iter **iter,
|
||||
struct bkey_s_c *k)
|
||||
{
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
*insert = NULL;
|
||||
|
||||
for (i = 0; i < trans->nr_updates; i++)
|
||||
if (!trans->updates[i].deferred &&
|
||||
trans->updates[i].iter->btree_id == btree_id &&
|
||||
!bkey_cmp(pos, trans->updates[i].iter->pos)) {
|
||||
*insert = &trans->updates[i];
|
||||
*iter = (*insert)->iter;
|
||||
*k = bkey_i_to_s_c((*insert)->k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*iter = __bch2_trans_get_iter(trans, btree_id, pos,
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, 0);
|
||||
if (IS_ERR(*iter))
|
||||
return PTR_ERR(*iter);
|
||||
|
||||
*k = bch2_btree_iter_peek_slot(*iter);
|
||||
ret = bkey_err(*k);
|
||||
if (ret)
|
||||
bch2_trans_iter_put(trans, *iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int trans_update_key(struct btree_trans *trans,
|
||||
struct btree_insert_entry **insert,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
unsigned extra_u64s)
|
||||
{
|
||||
struct bkey_i *new_k;
|
||||
|
||||
if (*insert)
|
||||
return 0;
|
||||
|
||||
new_k = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
|
||||
extra_u64s * sizeof(u64));
|
||||
if (IS_ERR(new_k))
|
||||
return PTR_ERR(new_k);
|
||||
|
||||
*insert = bch2_trans_update(trans, ((struct btree_insert_entry) {
|
||||
.iter = iter,
|
||||
.k = new_k,
|
||||
.triggered = true,
|
||||
}));
|
||||
|
||||
bkey_reassemble((*insert)->k, k);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_trans_mark_pointer(struct btree_trans *trans,
|
||||
struct extent_ptr_decoded p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct replicas_delta_list *d)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||
struct btree_insert_entry *insert;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bkey_i_alloc *a;
|
||||
bool overflow;
|
||||
int ret;
|
||||
|
||||
ret = trans_get_key(trans, BTREE_ID_ALLOC,
|
||||
POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
|
||||
&insert, &iter, &k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (k.k->type != KEY_TYPE_alloc) {
|
||||
bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu",
|
||||
p.ptr.dev,
|
||||
PTR_BUCKET_NR(ca, &p.ptr));
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
|
||||
|
||||
if (gen_after(u.gen, p.ptr.gen)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!p.ptr.cached)
|
||||
overflow = checked_add(u.dirty_sectors, sectors);
|
||||
else
|
||||
overflow = checked_add(u.cached_sectors, sectors);
|
||||
|
||||
u.data_type = u.dirty_sectors || u.cached_sectors
|
||||
? data_type : 0;
|
||||
|
||||
bch2_fs_inconsistent_on(overflow, c,
|
||||
"bucket sector count overflow: %u + %lli > U16_MAX",
|
||||
!p.ptr.cached
|
||||
? u.dirty_sectors
|
||||
: u.cached_sectors, sectors);
|
||||
|
||||
ret = trans_update_key(trans, &insert, iter, k, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
a = bkey_alloc_init(insert->k);
|
||||
a->k.p = iter->pos;
|
||||
bch2_alloc_pack(a, u);
|
||||
out:
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
|
||||
struct bch_extent_stripe_ptr p,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct replicas_delta_list *d)
|
||||
{
|
||||
struct bch_replicas_padded r;
|
||||
struct btree_insert_entry *insert;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_s_stripe s;
|
||||
unsigned nr_data;
|
||||
s64 parity_sectors;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!sectors);
|
||||
|
||||
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx),
|
||||
&insert, &iter, &k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (k.k->type != KEY_TYPE_stripe) {
|
||||
bch_err_ratelimited(trans->c,
|
||||
"pointer to nonexistent stripe %llu",
|
||||
(u64) p.idx);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = trans_update_key(trans, &insert, iter, k, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
s = bkey_i_to_s_stripe(insert->k);
|
||||
|
||||
nr_data = s.v->nr_blocks - s.v->nr_redundant;
|
||||
|
||||
parity_sectors = DIV_ROUND_UP(abs(sectors) * s.v->nr_redundant, nr_data);
|
||||
|
||||
if (sectors < 0)
|
||||
parity_sectors = -parity_sectors;
|
||||
|
||||
stripe_blockcount_set(s.v, p.block,
|
||||
stripe_blockcount_get(s.v, p.block) +
|
||||
sectors + parity_sectors);
|
||||
|
||||
bch2_bkey_to_replicas(&r.e, s.s_c);
|
||||
|
||||
update_replicas_list(d, &r.e, sectors);
|
||||
out:
|
||||
bch2_trans_iter_put(trans, iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_trans_mark_extent(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
s64 sectors, enum bch_data_type data_type,
|
||||
struct replicas_delta_list *d)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
struct bch_replicas_padded r;
|
||||
s64 dirty_sectors = 0;
|
||||
bool stale;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
r.e.data_type = data_type;
|
||||
r.e.nr_devs = 0;
|
||||
r.e.nr_required = 1;
|
||||
|
||||
BUG_ON(!sectors);
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
s64 disk_sectors = data_type == BCH_DATA_BTREE
|
||||
? sectors
|
||||
: ptr_disk_sectors_delta(p, sectors);
|
||||
|
||||
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
|
||||
data_type, d);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
stale = ret > 0;
|
||||
|
||||
if (p.ptr.cached) {
|
||||
if (disk_sectors && !stale)
|
||||
update_cached_sectors_list(d, p.ptr.dev,
|
||||
disk_sectors);
|
||||
} else if (!p.ec_nr) {
|
||||
dirty_sectors += disk_sectors;
|
||||
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
|
||||
} else {
|
||||
for (i = 0; i < p.ec_nr; i++) {
|
||||
ret = bch2_trans_mark_stripe_ptr(trans, p.ec[i],
|
||||
disk_sectors, data_type, d);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
r.e.nr_required = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty_sectors)
|
||||
update_replicas_list(d, &r.e, dirty_sectors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_trans_mark_key(struct btree_trans *trans,
|
||||
struct bkey_s_c k,
|
||||
bool inserting, s64 sectors,
|
||||
struct replicas_delta_list *d)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
return bch2_trans_mark_extent(trans, k, inserting
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE, d);
|
||||
case KEY_TYPE_extent:
|
||||
return bch2_trans_mark_extent(trans, k,
|
||||
sectors, BCH_DATA_USER, d);
|
||||
case KEY_TYPE_inode:
|
||||
if (inserting)
|
||||
d->fs_usage.nr_inodes++;
|
||||
else
|
||||
d->fs_usage.nr_inodes--;
|
||||
return 0;
|
||||
case KEY_TYPE_reservation: {
|
||||
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
|
||||
|
||||
sectors *= replicas;
|
||||
replicas = clamp_t(unsigned, replicas, 1,
|
||||
ARRAY_SIZE(d->fs_usage.persistent_reserved));
|
||||
|
||||
d->fs_usage.reserved += sectors;
|
||||
d->fs_usage.persistent_reserved[replicas - 1] += sectors;
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_trans_mark_update(struct btree_trans *trans,
|
||||
struct btree_insert_entry *insert,
|
||||
struct replicas_delta_list *d)
|
||||
{
|
||||
struct btree_iter *iter = insert->iter;
|
||||
struct btree *b = iter->l[0].b;
|
||||
struct btree_node_iter node_iter = iter->l[0].iter;
|
||||
struct bkey_packed *_k;
|
||||
int ret;
|
||||
|
||||
if (!btree_node_type_needs_gc(iter->btree_id))
|
||||
return 0;
|
||||
|
||||
ret = bch2_trans_mark_key(trans,
|
||||
bkey_i_to_s_c(insert->k), true,
|
||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||
bkey_start_offset(&insert->k->k), d);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
|
||||
KEY_TYPE_discard))) {
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k;
|
||||
s64 sectors = 0;
|
||||
|
||||
k = bkey_disassemble(b, _k, &unpacked);
|
||||
|
||||
if (btree_node_is_extents(b)
|
||||
? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
|
||||
: bkey_cmp(insert->k->k.p, k.k->p))
|
||||
break;
|
||||
|
||||
if (btree_node_is_extents(b)) {
|
||||
switch (bch2_extent_overlap(&insert->k->k, k.k)) {
|
||||
case BCH_EXTENT_OVERLAP_ALL:
|
||||
sectors = -((s64) k.k->size);
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_BACK:
|
||||
sectors = bkey_start_offset(&insert->k->k) -
|
||||
k.k->p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_FRONT:
|
||||
sectors = bkey_start_offset(k.k) -
|
||||
insert->k->k.p.offset;
|
||||
break;
|
||||
case BCH_EXTENT_OVERLAP_MIDDLE:
|
||||
sectors = k.k->p.offset - insert->k->k.p.offset;
|
||||
BUG_ON(sectors <= 0);
|
||||
|
||||
ret = bch2_trans_mark_key(trans, k, true,
|
||||
sectors, d);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
sectors = bkey_start_offset(&insert->k->k) -
|
||||
k.k->p.offset;
|
||||
break;
|
||||
}
|
||||
|
||||
BUG_ON(sectors >= 0);
|
||||
}
|
||||
|
||||
ret = bch2_trans_mark_key(trans, k, false, sectors, d);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Disk reservations: */
|
||||
|
||||
#define SECTORS_CACHE 1024
|
||||
|
@ -100,7 +100,7 @@ static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
|
||||
struct bucket_mark m;
|
||||
|
||||
rcu_read_lock();
|
||||
m = READ_ONCE(bucket(ca, PTR_BUCKET_NR(ca, ptr))->mark);
|
||||
m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
|
||||
rcu_read_unlock();
|
||||
|
||||
return m;
|
||||
@ -266,6 +266,15 @@ int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
|
||||
struct bch_fs_usage *, unsigned);
|
||||
int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
|
||||
struct bch_fs_usage *, unsigned);
|
||||
|
||||
void bch2_replicas_delta_list_apply(struct bch_fs *,
|
||||
struct bch_fs_usage *,
|
||||
struct replicas_delta_list *);
|
||||
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
|
||||
bool, s64, struct replicas_delta_list *);
|
||||
int bch2_trans_mark_update(struct btree_trans *,
|
||||
struct btree_insert_entry *,
|
||||
struct replicas_delta_list *);
|
||||
void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);
|
||||
|
||||
/* disk reservations: */
|
||||
|
@ -93,6 +93,19 @@ struct bch_fs_usage_short {
|
||||
u64 nr_inodes;
|
||||
};
|
||||
|
||||
struct replicas_delta {
|
||||
s64 delta;
|
||||
struct bch_replicas_entry r;
|
||||
} __packed;
|
||||
|
||||
struct replicas_delta_list {
|
||||
struct bch_fs_usage fs_usage;
|
||||
|
||||
struct replicas_delta *top;
|
||||
struct replicas_delta d[0];
|
||||
u8 pad[256];
|
||||
};
|
||||
|
||||
/*
|
||||
* A reservation for space on disk:
|
||||
*/
|
||||
|
@ -539,14 +539,17 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
|
||||
struct btree_iter *iter)
|
||||
{
|
||||
size_t idx = iter->pos.offset;
|
||||
int ret = 0;
|
||||
|
||||
if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
bch2_btree_trans_unlock(iter->trans);
|
||||
ret = -EINTR;
|
||||
|
||||
if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
|
||||
return -EINTR;
|
||||
return ret;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -692,23 +695,22 @@ retry:
|
||||
|
||||
if (!ret)
|
||||
ret = -ENOSPC;
|
||||
goto out;
|
||||
goto err;
|
||||
found_slot:
|
||||
ret = ec_stripe_mem_alloc(c, iter);
|
||||
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
stripe->k.p = iter->pos;
|
||||
|
||||
bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
|
||||
|
||||
ret = bch2_trans_commit(&trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE);
|
||||
out:
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
return ret;
|
||||
@ -745,6 +747,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
|
||||
int ret = 0, dev, idx;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(pos),
|
||||
|
@ -903,15 +903,54 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
||||
bch2_btree_iter_verify(iter, l->b);
|
||||
}
|
||||
|
||||
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
unsigned ret = 0;
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
switch (__extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
case BCH_EXTENT_ENTRY_stripe_ptr:
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bpos
|
||||
bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter)
|
||||
bch2_extent_atomic_end(struct bkey_i *insert, struct btree_iter *iter)
|
||||
{
|
||||
struct btree *b = iter->l[0].b;
|
||||
struct btree_node_iter node_iter = iter->l[0].iter;
|
||||
struct bkey_packed *_k;
|
||||
unsigned nr_alloc_ptrs =
|
||||
bch2_bkey_nr_alloc_ptrs(bkey_i_to_s_c(insert));
|
||||
|
||||
BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
|
||||
BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
|
||||
BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
|
||||
|
||||
return bpos_min(k->k.p, b->key.k.p);
|
||||
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
|
||||
KEY_TYPE_discard))) {
|
||||
struct bkey unpacked;
|
||||
struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
|
||||
|
||||
if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
|
||||
break;
|
||||
|
||||
nr_alloc_ptrs += bch2_bkey_nr_alloc_ptrs(k);
|
||||
|
||||
if (nr_alloc_ptrs > 20) {
|
||||
BUG_ON(bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0);
|
||||
return bpos_min(insert->k.p, k.k->p);
|
||||
}
|
||||
|
||||
bch2_btree_node_iter_advance(&node_iter, b);
|
||||
}
|
||||
|
||||
return bpos_min(insert->k.p, b->key.k.p);
|
||||
}
|
||||
|
||||
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
|
||||
|
@ -43,6 +43,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
POS_MIN, BTREE_ITER_PREFETCH);
|
||||
@ -96,6 +97,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
break;
|
||||
}
|
||||
|
||||
BUG_ON(ret == -EINTR);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
|
||||
bch2_replicas_gc_end(c, ret);
|
||||
|
@ -62,6 +62,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
int ret = 0;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
bch2_trans_preload_iters(&trans);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
|
||||
bkey_start_pos(&bch2_keylist_front(keys)->k),
|
||||
@ -184,6 +185,7 @@ nomatch:
|
||||
}
|
||||
out:
|
||||
bch2_trans_exit(&trans);
|
||||
BUG_ON(ret == -EINTR);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -212,11 +212,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
bch2_disk_reservation_init(c, 0);
|
||||
struct bkey_i *split;
|
||||
bool split_compressed = false;
|
||||
unsigned flags = BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK;
|
||||
int ret;
|
||||
|
||||
bch2_trans_init(&trans, c);
|
||||
@ -252,9 +247,6 @@ retry:
|
||||
BCH_DISK_RESERVATION_NOFAIL);
|
||||
BUG_ON(ret);
|
||||
|
||||
flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
|
||||
flags &= ~BTREE_INSERT_NOMARK;
|
||||
flags |= BTREE_INSERT_NOMARK_OVERWRITES;
|
||||
split_compressed = true;
|
||||
}
|
||||
|
||||
@ -266,24 +258,31 @@ retry:
|
||||
bch2_btree_iter_set_pos(iter, split->k.p);
|
||||
} while (bkey_cmp(iter->pos, k->k.p) < 0);
|
||||
|
||||
ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
|
||||
if (split_compressed) {
|
||||
memset(&trans.fs_usage_deltas.fs_usage, 0,
|
||||
sizeof(trans.fs_usage_deltas.fs_usage));
|
||||
trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
|
||||
|
||||
ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
|
||||
-((s64) k->k.size),
|
||||
&trans.fs_usage_deltas) ?:
|
||||
bch2_trans_commit(&trans, &disk_res, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOMARK_OVERWRITES|
|
||||
BTREE_INSERT_NO_CLEAR_REPLICAS);
|
||||
} else {
|
||||
ret = bch2_trans_commit(&trans, &disk_res, NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (split_compressed) {
|
||||
/*
|
||||
* This isn't strictly correct - we should only be relying on
|
||||
* the btree node lock for synchronization with gc when we've
|
||||
* got a write lock held.
|
||||
*
|
||||
* but - there are other correctness issues if btree gc were to
|
||||
* run before journal replay finishes
|
||||
*/
|
||||
BUG_ON(c->gc_pos.phase);
|
||||
|
||||
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
|
||||
NULL, 0, 0);
|
||||
}
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
@ -527,7 +526,7 @@ static int verify_superblock_clean(struct bch_fs *c,
|
||||
struct bch_sb_field_clean *clean = *cleanp;
|
||||
int ret = 0;
|
||||
|
||||
if (!clean || !j)
|
||||
if (!c->sb.clean || !j)
|
||||
return 0;
|
||||
|
||||
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
|
||||
@ -653,6 +652,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
u64 journal_seq;
|
||||
LIST_HEAD(journal_entries);
|
||||
struct journal_keys journal_keys = { NULL };
|
||||
bool wrote = false, write_sb = false;
|
||||
int ret;
|
||||
|
||||
if (c->sb.clean)
|
||||
@ -677,8 +677,12 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
|
||||
"filesystem marked clean but journal not empty");
|
||||
if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
|
||||
"filesystem marked clean but journal not empty")) {
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->sb.clean = false;
|
||||
}
|
||||
|
||||
if (!c->sb.clean && list_empty(&journal_entries)) {
|
||||
bch_err(c, "no journal entries found");
|
||||
@ -736,12 +740,15 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch_verbose(c, "starting alloc read");
|
||||
err = "error reading allocation information";
|
||||
ret = bch2_alloc_read(c, &journal_keys);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "alloc read done");
|
||||
|
||||
bch_verbose(c, "starting stripes_read");
|
||||
err = "error reading stripes";
|
||||
ret = bch2_stripes_read(c, &journal_keys);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -749,11 +756,26 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
|
||||
if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
|
||||
/*
|
||||
* interior btree node updates aren't consistent with the
|
||||
* journal; after an unclean shutdown we have to walk all
|
||||
* pointers to metadata:
|
||||
*/
|
||||
bch_verbose(c, "starting metadata mark and sweep:");
|
||||
err = "error in mark and sweep";
|
||||
ret = bch2_gc(c, NULL, true, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "mark and sweep done");
|
||||
}
|
||||
|
||||
if (c->opts.fsck ||
|
||||
!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
|
||||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
|
||||
bch_verbose(c, "starting mark and sweep:");
|
||||
err = "error in recovery";
|
||||
err = "error in mark and sweep";
|
||||
ret = bch2_gc(c, &journal_keys, true, false);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -780,6 +802,16 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
goto err;
|
||||
bch_verbose(c, "journal replay done");
|
||||
|
||||
bch_verbose(c, "writing allocation info:");
|
||||
err = "error writing out alloc info";
|
||||
ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
|
||||
bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
|
||||
if (ret) {
|
||||
bch_err(c, "error writing alloc info");
|
||||
goto err;
|
||||
}
|
||||
bch_verbose(c, "alloc write done");
|
||||
|
||||
if (c->opts.norecovery)
|
||||
goto out;
|
||||
|
||||
@ -802,13 +834,23 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
c->disk_sb.sb->version_min =
|
||||
le16_to_cpu(bcachefs_metadata_version_min);
|
||||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (c->opts.fsck &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags)) {
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
|
||||
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (c->journal_seq_blacklist_table &&
|
||||
@ -821,7 +863,7 @@ out:
|
||||
return ret;
|
||||
err:
|
||||
fsck_err:
|
||||
pr_err("Error in recovery: %s (%i)", err, ret);
|
||||
bch_err(c, "Error in recovery: %s (%i)", err, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -102,8 +102,8 @@ static void stripe_to_replicas(struct bkey_s_c k,
|
||||
r->devs[r->nr_devs++] = ptr->dev;
|
||||
}
|
||||
|
||||
static void bkey_to_replicas(struct bch_replicas_entry *e,
|
||||
struct bkey_s_c k)
|
||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
e->nr_devs = 0;
|
||||
|
||||
@ -439,7 +439,7 @@ bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
|
||||
return false;
|
||||
}
|
||||
|
||||
bkey_to_replicas(&search.e, k);
|
||||
bch2_bkey_to_replicas(&search.e, k);
|
||||
|
||||
return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
|
||||
}
|
||||
@ -472,7 +472,7 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bkey_to_replicas(&search.e, k);
|
||||
bch2_bkey_to_replicas(&search.e, k);
|
||||
|
||||
return bch2_mark_replicas(c, &search.e);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ int bch2_mark_replicas(struct bch_fs *,
|
||||
|
||||
bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
|
||||
struct bkey_s_c, bool);
|
||||
void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
|
||||
bool bch2_bkey_replicas_marked(struct bch_fs *,
|
||||
struct bkey_s_c, bool);
|
||||
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -946,7 +946,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
|
||||
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
|
||||
ret = bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
@ -1063,6 +1063,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
|
||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
|
||||
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
|
||||
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
|
||||
|
||||
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user