mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 14:24:11 +08:00
bcachefs: Really don't hold btree locks while btree IOs are in flight
This is something we've attempted to stick to for quite some time, as it helps guarantee filesystem latency - but there's a few remaining paths that this patch fixes. This is also necessary for an upcoming patch to update btree pointers after every btree write - since the btree write completion path will now be doing btree operations. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
e3a67bdb6e
commit
19d5432445
@ -187,6 +187,17 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&bc->lock);
|
||||
wait_on_io:
|
||||
if (b->flags & ((1U << BTREE_NODE_dirty)|
|
||||
(1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush)
|
||||
return -ENOMEM;
|
||||
|
||||
/* XXX: waiting on IO with btree cache lock held */
|
||||
bch2_btree_node_wait_on_read(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
}
|
||||
|
||||
if (!six_trylock_intent(&b->c.lock))
|
||||
return -ENOMEM;
|
||||
@ -194,25 +205,26 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
if (!six_trylock_write(&b->c.lock))
|
||||
goto out_unlock_intent;
|
||||
|
||||
/* recheck under lock */
|
||||
if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
|
||||
(1U << BTREE_NODE_write_in_flight))) {
|
||||
if (!flush)
|
||||
goto out_unlock;
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
|
||||
if (btree_node_noevict(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (!btree_node_may_write(b))
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b) &&
|
||||
test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
goto out_unlock;
|
||||
|
||||
if (btree_node_dirty(b) ||
|
||||
btree_node_write_in_flight(b) ||
|
||||
btree_node_read_in_flight(b)) {
|
||||
if (!flush)
|
||||
if (btree_node_dirty(b)) {
|
||||
if (!flush ||
|
||||
test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
goto out_unlock;
|
||||
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Using the underscore version because we don't want to compact
|
||||
* bsets after the write, since this node is about to be evicted
|
||||
@ -224,8 +236,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
|
||||
else
|
||||
__bch2_btree_node_write(c, b);
|
||||
|
||||
/* wait for any in flight btree write */
|
||||
btree_node_wait_on_io(b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
out:
|
||||
if (b->hash_val && !ret)
|
||||
@ -581,6 +594,7 @@ got_node:
|
||||
}
|
||||
|
||||
BUG_ON(btree_node_hashed(b));
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
out:
|
||||
b->flags = 0;
|
||||
@ -634,6 +648,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
{
|
||||
struct btree_cache *bc = &c->btree_cache;
|
||||
struct btree *b;
|
||||
u32 seq;
|
||||
|
||||
BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
|
||||
/*
|
||||
@ -663,31 +678,31 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
seq = b->c.lock.state.seq;
|
||||
six_unlock_intent(&b->c.lock);
|
||||
|
||||
/* Unlock before doing IO: */
|
||||
if (iter && sync)
|
||||
bch2_trans_unlock(iter->trans);
|
||||
|
||||
bch2_btree_node_read(c, b, sync);
|
||||
|
||||
six_unlock_write(&b->c.lock);
|
||||
|
||||
if (!sync) {
|
||||
six_unlock_intent(&b->c.lock);
|
||||
if (!sync)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: this will probably always fail because btree_iter_relock()
|
||||
* currently fails for iterators that aren't pointed at a valid btree
|
||||
* node
|
||||
*/
|
||||
if (iter && !bch2_trans_relock(iter->trans)) {
|
||||
six_unlock_intent(&b->c.lock);
|
||||
if (iter && !bch2_trans_relock(iter->trans))
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
if (lock_type == SIX_LOCK_read)
|
||||
six_lock_downgrade(&b->c.lock);
|
||||
if (!six_relock_type(&b->c.lock, lock_type, seq))
|
||||
return ERR_PTR(-EINTR);
|
||||
|
||||
return b;
|
||||
}
|
||||
@ -831,11 +846,12 @@ lock_node:
|
||||
}
|
||||
|
||||
if (unlikely(btree_node_read_in_flight(b))) {
|
||||
u32 seq = b->c.lock.state.seq;
|
||||
|
||||
six_unlock_type(&b->c.lock, lock_type);
|
||||
bch2_trans_unlock(iter->trans);
|
||||
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
bch2_btree_node_wait_on_read(b);
|
||||
|
||||
/*
|
||||
* XXX: check if this always fails - btree_iter_relock()
|
||||
@ -844,7 +860,9 @@ lock_node:
|
||||
*/
|
||||
if (iter && !bch2_trans_relock(iter->trans))
|
||||
return ERR_PTR(-EINTR);
|
||||
goto retry;
|
||||
|
||||
if (!six_relock_type(&b->c.lock, lock_type, seq))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
prefetch(b->aux_data);
|
||||
@ -923,8 +941,7 @@ lock_node:
|
||||
}
|
||||
|
||||
/* XXX: waiting on IO with btree locks held: */
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
__bch2_btree_node_wait_on_read(b);
|
||||
|
||||
prefetch(b->aux_data);
|
||||
|
||||
@ -979,16 +996,24 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
|
||||
b = btree_cache_find(bc, k);
|
||||
if (!b)
|
||||
return;
|
||||
wait_on_io:
|
||||
/* not allowed to wait on io with btree locks held: */
|
||||
|
||||
/* XXX we're called from btree_gc which will be holding other btree
|
||||
* nodes locked
|
||||
* */
|
||||
__bch2_btree_node_wait_on_read(b);
|
||||
__bch2_btree_node_wait_on_write(b);
|
||||
|
||||
six_lock_intent(&b->c.lock, NULL, NULL);
|
||||
six_lock_write(&b->c.lock, NULL, NULL);
|
||||
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
__bch2_btree_node_write(c, b);
|
||||
|
||||
/* wait for any in flight btree write */
|
||||
btree_node_wait_on_io(b);
|
||||
if (btree_node_dirty(b)) {
|
||||
__bch2_btree_node_write(c, b);
|
||||
six_unlock_write(&b->c.lock);
|
||||
six_unlock_intent(&b->c.lock);
|
||||
goto wait_on_io;
|
||||
}
|
||||
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
|
||||
|
@ -22,6 +22,44 @@
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
void bch2_btree_node_io_unlock(struct btree *b)
|
||||
{
|
||||
EBUG_ON(!btree_node_write_in_flight(b));
|
||||
|
||||
clear_btree_node_write_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
void bch2_btree_node_io_lock(struct btree *b)
|
||||
{
|
||||
wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void __bch2_btree_node_wait_on_read(struct btree *b)
|
||||
{
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void __bch2_btree_node_wait_on_write(struct btree *b)
|
||||
{
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void bch2_btree_node_wait_on_read(struct btree *b)
|
||||
{
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
void bch2_btree_node_wait_on_write(struct btree *b)
|
||||
{
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static void verify_no_dups(struct btree *b,
|
||||
struct bkey_packed *start,
|
||||
struct bkey_packed *end)
|
||||
@ -432,7 +470,8 @@ void bch2_btree_init_next(struct btree_trans *trans,
|
||||
EBUG_ON(iter && iter->l[b->c.level].b != b);
|
||||
BUG_ON(bset_written(b, bset(b, &b->set[1])));
|
||||
|
||||
if (b->nsets == MAX_BSETS) {
|
||||
if (b->nsets == MAX_BSETS &&
|
||||
!btree_node_write_in_flight(b)) {
|
||||
unsigned log_u64s[] = {
|
||||
ilog2(bset_u64s(&b->set[0])),
|
||||
ilog2(bset_u64s(&b->set[1])),
|
||||
@ -1402,8 +1441,6 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
btree_pos_to_text(&PBUF(buf), c, b);
|
||||
trace_btree_read(c, b);
|
||||
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
if (bch2_verify_all_btree_replicas &&
|
||||
!btree_node_read_all_replicas(c, b, sync))
|
||||
return;
|
||||
@ -1480,6 +1517,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
bkey_copy(&b->key, k);
|
||||
BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
|
||||
|
||||
set_btree_node_read_in_flight(b);
|
||||
|
||||
bch2_btree_node_read(c, b, true);
|
||||
|
||||
if (btree_node_read_error(b)) {
|
||||
@ -1525,7 +1564,7 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
struct btree_write *w = btree_prev_write(b);
|
||||
|
||||
bch2_btree_complete_write(c, b, w);
|
||||
btree_node_io_unlock(b);
|
||||
bch2_btree_node_io_unlock(b);
|
||||
}
|
||||
|
||||
static void bch2_btree_node_write_error(struct bch_fs *c,
|
||||
@ -1707,6 +1746,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
bool validate_before_checksum = false;
|
||||
void *data;
|
||||
|
||||
BUG_ON(btree_node_write_in_flight(b));
|
||||
|
||||
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
return;
|
||||
|
||||
@ -1734,7 +1775,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
|
||||
* XXX waiting on btree writes with btree locks held -
|
||||
* this can deadlock, and we hit the write error path
|
||||
*/
|
||||
btree_node_wait_on_io(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -52,24 +52,12 @@ struct btree_write_bio {
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
static inline void btree_node_io_unlock(struct btree *b)
|
||||
{
|
||||
EBUG_ON(!btree_node_write_in_flight(b));
|
||||
clear_btree_node_write_in_flight(b);
|
||||
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
|
||||
}
|
||||
|
||||
static inline void btree_node_io_lock(struct btree *b)
|
||||
{
|
||||
wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static inline void btree_node_wait_on_io(struct btree *b)
|
||||
{
|
||||
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
void bch2_btree_node_io_unlock(struct btree *);
|
||||
void bch2_btree_node_io_lock(struct btree *);
|
||||
void __bch2_btree_node_wait_on_read(struct btree *);
|
||||
void __bch2_btree_node_wait_on_write(struct btree *);
|
||||
void bch2_btree_node_wait_on_read(struct btree *);
|
||||
void bch2_btree_node_wait_on_write(struct btree *);
|
||||
|
||||
static inline bool btree_node_may_write(struct btree *b)
|
||||
{
|
||||
@ -169,7 +157,7 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
six_unlock_type(&b->c.lock, lock_held);
|
||||
btree_node_wait_on_io(b);
|
||||
bch2_btree_node_wait_on_write(b);
|
||||
btree_node_lock_type(c, b, lock_held);
|
||||
}
|
||||
}
|
||||
|
@ -567,7 +567,7 @@ static void btree_update_nodes_written(struct btree_update *as)
|
||||
six_unlock_read(&old->c.lock);
|
||||
|
||||
if (seq == as->old_nodes_seq[i])
|
||||
btree_node_wait_on_io(old);
|
||||
bch2_btree_node_wait_on_write(old);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -133,7 +133,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
if (c->opts.nochanges)
|
||||
return;
|
||||
|
||||
btree_node_io_lock(b);
|
||||
bch2_btree_node_io_lock(b);
|
||||
mutex_lock(&c->verify_lock);
|
||||
|
||||
if (!c->verify_ondisk) {
|
||||
@ -176,7 +176,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&c->verify_lock);
|
||||
btree_node_io_unlock(b);
|
||||
bch2_btree_node_io_unlock(b);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
Loading…
Reference in New Issue
Block a user