jbd2: fix ocfs2 corrupt when updating journal superblock fails

If updating journal superblock fails after journal data has been
flushed, the error is omitted and this will mislead the caller as a
normal case.  In ocfs2, the checkpoint will be treated successfully
and the other node can get the lock to update. Since the sb_start is
still pointing to the old log block, it will rewrite the journal data
during journal recovery by the other node. Thus the new updates will
be overwritten and ocfs2 corrupts.  So in above case we have to return
the error, and ocfs2_commit_cache will take care of the error and
prevent the other node to do update first.  And only after recovering
journal it can do the new updates.

The issue discussion mail can be found at:
https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html
http://comments.gmane.org/gmane.comp.file-systems.ext4/48841

[ Fixed bug in patch which allowed a non-negative error return from
  jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this
  was causing xfstests ext4/306 to fail. -- Ted ]

Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Yiwen Jiang <jiangyiwen@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: stable@vger.kernel.org
This commit is contained in:
Joseph Qi 2015-06-15 14:36:01 -04:00 committed by Theodore Ts'o
parent 97b4af2f76
commit 6f6a6fda29
3 changed files with 35 additions and 12 deletions

View File

@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
unsigned long blocknr; unsigned long blocknr;
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return 1; return -EIO;
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1; return 1;
@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
if (journal->j_flags & JBD2_BARRIER) if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
__jbd2_update_log_tail(journal, first_tid, blocknr); return __jbd2_update_log_tail(journal, first_tid, blocknr);
return 0;
} }

View File

@ -876,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
* *
* Requires j_checkpoint_mutex * Requires j_checkpoint_mutex
*/ */
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{ {
unsigned long freed; unsigned long freed;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
@ -888,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay * space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data. * old transaction with possibly newly overwritten data.
*/ */
jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
if (ret)
goto out;
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
freed = block - journal->j_tail; freed = block - journal->j_tail;
if (block < journal->j_tail) if (block < journal->j_tail)
@ -904,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
journal->j_tail_sequence = tid; journal->j_tail_sequence = tid;
journal->j_tail = block; journal->j_tail = block;
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
out:
return ret;
} }
/* /*
@ -1322,7 +1329,7 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal); return jbd2_journal_start_thread(journal);
} }
static void jbd2_write_superblock(journal_t *journal, int write_op) static int jbd2_write_superblock(journal_t *journal, int write_op)
{ {
struct buffer_head *bh = journal->j_sb_buffer; struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock; journal_superblock_t *sb = journal->j_superblock;
@ -1361,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
printk(KERN_ERR "JBD2: Error %d detected when updating " printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret, "journal superblock for %s.\n", ret,
journal->j_devname); journal->j_devname);
jbd2_journal_abort(journal, ret);
} }
return ret;
} }
/** /**
@ -1374,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
* Update a journal's superblock information about log tail and write it to * Update a journal's superblock information about log tail and write it to
* disk, waiting for the IO to complete. * disk, waiting for the IO to complete.
*/ */
void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
unsigned long tail_block, int write_op) unsigned long tail_block, int write_op)
{ {
journal_superblock_t *sb = journal->j_superblock; journal_superblock_t *sb = journal->j_superblock;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@ -1386,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
sb->s_sequence = cpu_to_be32(tail_tid); sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block); sb->s_start = cpu_to_be32(tail_block);
jbd2_write_superblock(journal, write_op); ret = jbd2_write_superblock(journal, write_op);
if (ret)
goto out;
/* Log is no longer empty */ /* Log is no longer empty */
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
WARN_ON(!sb->s_sequence); WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED; journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
out:
return ret;
} }
/** /**
@ -1941,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
return -EIO; return -EIO;
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
jbd2_cleanup_journal_tail(journal); if (!err) {
err = jbd2_cleanup_journal_tail(journal);
if (err < 0) {
mutex_unlock(&journal->j_checkpoint_mutex);
goto out;
}
err = 0;
}
/* Finally, mark the journal as really needing no recovery. /* Finally, mark the journal as really needing no recovery.
* This sets s_start==0 in the underlying superblock, which is * This sets s_start==0 in the underlying superblock, which is
@ -1957,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 0; out:
return err;
} }
/** /**

View File

@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block); unsigned long *block);
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
/* Commit management */ /* Commit management */
@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *); extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_sb_errno(journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *);
extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
unsigned long, int); unsigned long, int);
extern void __jbd2_journal_abort_hard (journal_t *); extern void __jbd2_journal_abort_hard (journal_t *);
extern void jbd2_journal_abort (journal_t *, int); extern void jbd2_journal_abort (journal_t *, int);