gfs2: Introduce concept of a pending withdraw

File system withdraws can be delayed when inconsistencies are
discovered when we cannot withdraw immediately, for example, when
critical spin_locks are held. But delaying the withdraw can cause
gfs2 to ignore the error and keep running for a short period of time.
For example, an rgrp glock may be dequeued and demoted while there
are still buffers that haven't been properly revoked, due to io
errors writing to the journal.

This patch introduces a new concept of a pending withdraw, which
means an inconsistency has been discovered and we need to withdraw
at the earliest possible opportunity. In these cases, we aren't
quite withdrawn yet, but we still need to not dequeue glocks and
other critical things. If we dequeue the glocks and the withdraw
results in our journal being replayed, the replay could overwrite
data that's been modified by a different node that acquired the
glock in the meantime.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
This commit is contained in:
Bob Peterson 2019-02-12 13:43:55 -07:00
parent 8e28ef1f2f
commit 69511080bd
4 changed files with 27 additions and 21 deletions

View File

@ -618,6 +618,7 @@ enum {
SDF_FORCE_AIL_FLUSH = 9,
SDF_AIL1_IO_ERROR = 10,
SDF_FS_FROZEN = 11,
SDF_WITHDRAWING = 12, /* Will withdraw eventually */
};
enum gfs2_freeze_state {

View File

@ -88,8 +88,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
struct writeback_control *wbc,
struct gfs2_trans *tr,
bool *withdraw)
struct gfs2_trans *tr)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
@ -108,7 +107,7 @@ __acquires(&sdp->sd_ail_lock)
!test_and_set_bit(SDF_AIL1_IO_ERROR,
&sdp->sd_flags)) {
gfs2_io_error_bh(sdp, bh);
*withdraw = true;
gfs2_withdraw_delayed(sdp);
}
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
continue;
@ -149,7 +148,6 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
struct list_head *head = &sdp->sd_ail1_list;
struct gfs2_trans *tr;
struct blk_plug plug;
bool withdraw = false;
trace_gfs2_ail_flush(sdp, wbc, 1);
blk_start_plug(&plug);
@ -158,13 +156,12 @@ restart:
list_for_each_entry_reverse(tr, head, tr_list) {
if (wbc->nr_to_write <= 0)
break;
if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw) &&
!gfs2_withdrawn(sdp))
if (gfs2_ail1_start_one(sdp, wbc, tr) && !gfs2_withdrawn(sdp))
goto restart;
}
spin_unlock(&sdp->sd_ail_lock);
blk_finish_plug(&plug);
if (withdraw)
if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags))
gfs2_withdraw(sdp);
trace_gfs2_ail_flush(sdp, wbc, 0);
}
@ -193,8 +190,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
*
*/
static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
bool *withdraw)
static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
@ -208,7 +204,7 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
if (!buffer_uptodate(bh) &&
!test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
gfs2_io_error_bh(sdp, bh);
*withdraw = true;
gfs2_withdraw_delayed(sdp);
}
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}
@ -226,11 +222,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
struct gfs2_trans *tr, *s;
int oldest_tr = 1;
int ret;
bool withdraw = false;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
gfs2_ail1_empty_one(sdp, tr, &withdraw);
gfs2_ail1_empty_one(sdp, tr);
if (list_empty(&tr->tr_ail1_list) && oldest_tr)
list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
@ -239,7 +234,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
if (withdraw) {
if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
gfs2_lm(sdp, "fatal: I/O error(s)\n");
gfs2_withdraw(sdp);
}

View File

@ -249,13 +249,13 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
{
if (!gfs2_withdrawn(sdp))
fs_err(sdp,
"fatal: I/O error\n"
" block = %llu\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr,
function, file, line);
if (gfs2_withdrawn(sdp))
return;
fs_err(sdp, "fatal: I/O error\n"
" block = %llu\n"
" function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr, function, file, line);
if (withdraw)
gfs2_withdraw(sdp);
}

View File

@ -172,13 +172,23 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
return x;
}
/**
* gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks
* @sdp: the superblock
*/
static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp)
{
set_bit(SDF_WITHDRAWING, &sdp->sd_flags);
}
/**
* gfs2_withdrawn - test whether the file system is withdrawing or withdrawn
* @sdp: the superblock
*/
static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
{
return test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
}
#define gfs2_tune_get(sdp, field) \