mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 04:34:08 +08:00
Various ext4 bug fixes and cleanups for v6.8-rc1. The fixes are
mostly in the fstrim and mballoc code paths. Also enable dioread_nolock in the case where the block size is less than the page size. (Dioread_nolock has been default in the bs == ps case for quite some time.) -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmWe6MMACgkQ8vlZVpUN gaM/gAf/e9j4yCAR/W23cICNh/9hw2U0HItEONZF7GDfySlGADL5dsOADe58jLY9 g8UwBpHptOcyxmMTYgdKPQ2YpUF+3Kd4oi2M1Q6CjeeBeRbwuzT4lMTeKrtMEgiz Ns8mqBgGX3DIXjcbkdO9QdLZPBj07djamAIQlWVLHAR2w6LPgiBhHebUSe+36Ufk xLaj5X2nkdTtPcN1EnlTYNR+zMLyAwXUsxKf44aUveRwiNAfLGBgY9yvFby7hC+6 ENCP1WsalvVnaI8mr9pgt1KTXIrElknA1bbiWJ9RZ5Y8Za+MEHxXBKpP/AStX8Nc WEo7a9tNB1AXU04+/SgVp9GAkXEViA== =Zk8h -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Various ext4 bug fixes and cleanups. The fixes are mostly in the fstrim and mballoc code paths. Also enable dioread_nolock in the case where the block size is less than the page size (dioread_nolock has been default in the bs == ps case for quite some time)" * tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix inconsistent between segment fstrim and full fstrim ext4: fallback to complex scan if aligned scan doesn't work ext4: convert ext4_da_do_write_end() to take a folio ext4: allow for the last group to be marked as trimmed ext4: move ext4_check_bdev_write_error() into nojournal mode jbd2: abort journal when detecting metadata writeback error of fs dev jbd2: remove unused 'JBD2_CHECKPOINT_IO_ERROR' and 'j_atomic_flags' jbd2: replace journal state flag by checking errseq jbd2: add errseq to detect client fs's bdev writeback error ext4: improving calculation of 'fe_{len|start}' in mb_find_extent() ext4: clarify handling of unwritten bh in __ext4_block_zero_page_range() ext4: treat end of range as exclusive in ext4_zero_range() ext4: enable dioread_nolock as default for bs < ps case ext4: delete redundant calculations in ext4_mb_get_buddy_page_lock() ext4: reduce unnecessary memory allocation in alloc_flex_gd() ext4: avoid online resizing failures due to oversized flex bg ext4: remove unnecessary check from alloc_flex_gd() ext4: unify the type of flexbg_size to unsigned int
This commit is contained in:
commit
0d19d9e146
@ -235,8 +235,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
|
||||
might_sleep();
|
||||
|
||||
ext4_check_bdev_write_error(sb);
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_get_write_access(handle, bh);
|
||||
if (err) {
|
||||
@ -244,7 +242,8 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
handle, err);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
} else
|
||||
ext4_check_bdev_write_error(sb);
|
||||
if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
|
||||
return 0;
|
||||
BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
|
||||
|
@ -4523,7 +4523,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
* Round up offset. This is not fallocate, we need to zero out
|
||||
* blocks, so convert interior block aligned part of the range to
|
||||
* unwritten and possibly manually zero out unaligned parts of the
|
||||
* range.
|
||||
* range. Here, start and partial_begin are inclusive, end and
|
||||
* partial_end are exclusive.
|
||||
*/
|
||||
start = round_up(offset, 1 << blkbits);
|
||||
end = round_down((offset + len), 1 << blkbits);
|
||||
@ -4609,7 +4610,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
* disk in case of crash before zeroing trans is committed.
|
||||
*/
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
ret = filemap_write_and_wait_range(mapping, start, end);
|
||||
ret = filemap_write_and_wait_range(mapping, start,
|
||||
end - 1);
|
||||
if (ret) {
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
|
@ -2947,7 +2947,7 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
|
||||
|
||||
static int ext4_da_do_write_end(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page)
|
||||
struct folio *folio)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
loff_t old_size = inode->i_size;
|
||||
@ -2958,12 +2958,13 @@ static int ext4_da_do_write_end(struct address_space *mapping,
|
||||
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
|
||||
* flag, which all that's needed to trigger page writeback.
|
||||
*/
|
||||
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
|
||||
copied = block_write_end(NULL, mapping, pos, len, copied,
|
||||
&folio->page, NULL);
|
||||
new_i_size = pos + copied;
|
||||
|
||||
/*
|
||||
* It's important to update i_size while still holding page lock,
|
||||
* because page writeout could otherwise come in and zero beyond
|
||||
* It's important to update i_size while still holding folio lock,
|
||||
* because folio writeout could otherwise come in and zero beyond
|
||||
* i_size.
|
||||
*
|
||||
* Since we are holding inode lock, we are sure i_disksize <=
|
||||
@ -2981,14 +2982,14 @@ static int ext4_da_do_write_end(struct address_space *mapping,
|
||||
|
||||
i_size_write(inode, new_i_size);
|
||||
end = (new_i_size - 1) & (PAGE_SIZE - 1);
|
||||
if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
|
||||
if (copied && ext4_da_should_update_i_disksize(folio, end)) {
|
||||
ext4_update_i_disksize(inode, new_i_size);
|
||||
disksize_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
||||
if (old_size < pos)
|
||||
pagecache_isize_extended(inode, old_size, pos);
|
||||
@ -3027,10 +3028,10 @@ static int ext4_da_write_end(struct file *file,
|
||||
return ext4_write_inline_data_end(inode, pos, len, copied,
|
||||
folio);
|
||||
|
||||
if (unlikely(copied < len) && !PageUptodate(page))
|
||||
if (unlikely(copied < len) && !folio_test_uptodate(folio))
|
||||
copied = 0;
|
||||
|
||||
return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
|
||||
return ext4_da_do_write_end(mapping, pos, len, copied, folio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3630,6 +3631,12 @@ void ext4_set_aops(struct inode *inode)
|
||||
inode->i_mapping->a_ops = &ext4_aops;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we can't skip an unwritten buffer even though it usually reads zero
|
||||
* because it might have data in pagecache (eg, if called from ext4_zero_range,
|
||||
* ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a
|
||||
* racing writeback can come later and flush the stale pagecache to disk.
|
||||
*/
|
||||
static int __ext4_block_zero_page_range(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from, loff_t length)
|
||||
{
|
||||
|
@ -1456,9 +1456,8 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
block++;
|
||||
pnum = block / blocks_per_page;
|
||||
page = find_or_create_page(inode->i_mapping, pnum, gfp);
|
||||
/* blocks_per_page == 1, hence we need another page for the buddy */
|
||||
page = find_or_create_page(inode->i_mapping, block + 1, gfp);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
BUG_ON(page->mapping != inode->i_mapping);
|
||||
@ -1958,8 +1957,7 @@ done:
|
||||
static int mb_find_extent(struct ext4_buddy *e4b, int block,
|
||||
int needed, struct ext4_free_extent *ex)
|
||||
{
|
||||
int next = block;
|
||||
int max, order;
|
||||
int max, order, next;
|
||||
void *buddy;
|
||||
|
||||
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
|
||||
@ -1977,16 +1975,12 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
|
||||
|
||||
/* find actual order */
|
||||
order = mb_find_order_for_block(e4b, block);
|
||||
block = block >> order;
|
||||
|
||||
ex->fe_len = 1 << order;
|
||||
ex->fe_start = block << order;
|
||||
ex->fe_len = (1 << order) - (block & ((1 << order) - 1));
|
||||
ex->fe_start = block;
|
||||
ex->fe_group = e4b->bd_group;
|
||||
|
||||
/* calc difference from given start */
|
||||
next = next - ex->fe_start;
|
||||
ex->fe_len -= next;
|
||||
ex->fe_start += next;
|
||||
block = block >> order;
|
||||
|
||||
while (needed > ex->fe_len &&
|
||||
mb_find_buddy(e4b, order, &max)) {
|
||||
@ -2895,14 +2889,19 @@ repeat:
|
||||
ac->ac_groups_scanned++;
|
||||
if (cr == CR_POWER2_ALIGNED)
|
||||
ext4_mb_simple_scan_group(ac, &e4b);
|
||||
else if ((cr == CR_GOAL_LEN_FAST ||
|
||||
cr == CR_BEST_AVAIL_LEN) &&
|
||||
sbi->s_stripe &&
|
||||
!(ac->ac_g_ex.fe_len %
|
||||
EXT4_B2C(sbi, sbi->s_stripe)))
|
||||
ext4_mb_scan_aligned(ac, &e4b);
|
||||
else
|
||||
ext4_mb_complex_scan_group(ac, &e4b);
|
||||
else {
|
||||
bool is_stripe_aligned = sbi->s_stripe &&
|
||||
!(ac->ac_g_ex.fe_len %
|
||||
EXT4_B2C(sbi, sbi->s_stripe));
|
||||
|
||||
if ((cr == CR_GOAL_LEN_FAST ||
|
||||
cr == CR_BEST_AVAIL_LEN) &&
|
||||
is_stripe_aligned)
|
||||
ext4_mb_scan_aligned(ac, &e4b);
|
||||
|
||||
if (ac->ac_status == AC_STATUS_CONTINUE)
|
||||
ext4_mb_complex_scan_group(ac, &e4b);
|
||||
}
|
||||
|
||||
ext4_unlock_group(sb, group);
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
@ -6735,11 +6734,16 @@ __acquires(bitlock)
|
||||
static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
|
||||
ext4_group_t grp)
|
||||
{
|
||||
if (grp < ext4_get_groups_count(sb))
|
||||
return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
|
||||
return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
|
||||
ext4_group_first_block_no(sb, grp) - 1) >>
|
||||
EXT4_CLUSTER_BITS(sb);
|
||||
unsigned long nr_clusters_in_group;
|
||||
|
||||
if (grp < (ext4_get_groups_count(sb) - 1))
|
||||
nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
|
||||
else
|
||||
nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
|
||||
ext4_group_first_block_no(sb, grp))
|
||||
>> EXT4_CLUSTER_BITS(sb);
|
||||
|
||||
return nr_clusters_in_group - 1;
|
||||
}
|
||||
|
||||
static bool ext4_trim_interrupted(void)
|
||||
@ -6753,13 +6757,15 @@ static int ext4_try_to_trim_range(struct super_block *sb,
|
||||
__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
|
||||
__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
|
||||
{
|
||||
ext4_grpblk_t next, count, free_count;
|
||||
ext4_grpblk_t next, count, free_count, last, origin_start;
|
||||
bool set_trimmed = false;
|
||||
void *bitmap;
|
||||
|
||||
last = ext4_last_grp_cluster(sb, e4b->bd_group);
|
||||
bitmap = e4b->bd_bitmap;
|
||||
if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
|
||||
if (start == 0 && max >= last)
|
||||
set_trimmed = true;
|
||||
origin_start = start;
|
||||
start = max(e4b->bd_info->bb_first_free, start);
|
||||
count = 0;
|
||||
free_count = 0;
|
||||
@ -6768,7 +6774,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
|
||||
start = mb_find_next_zero_bit(bitmap, max + 1, start);
|
||||
if (start > max)
|
||||
break;
|
||||
next = mb_find_next_bit(bitmap, max + 1, start);
|
||||
|
||||
next = mb_find_next_bit(bitmap, last + 1, start);
|
||||
if (origin_start == 0 && next >= last)
|
||||
set_trimmed = true;
|
||||
|
||||
if ((next - start) >= minblocks) {
|
||||
int ret = ext4_trim_extent(sb, start, next - start, e4b);
|
||||
|
@ -218,35 +218,53 @@ struct ext4_new_flex_group_data {
|
||||
in the flex group */
|
||||
__u16 *bg_flags; /* block group flags of groups
|
||||
in @groups */
|
||||
ext4_group_t resize_bg; /* number of allocated
|
||||
new_group_data */
|
||||
ext4_group_t count; /* number of groups in @groups
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* Avoiding memory allocation failures due to too many groups added each time.
|
||||
*/
|
||||
#define MAX_RESIZE_BG 16384
|
||||
|
||||
/*
|
||||
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
|
||||
* @flexbg_size.
|
||||
*
|
||||
* Returns NULL on failure otherwise address of the allocated structure.
|
||||
*/
|
||||
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
|
||||
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size,
|
||||
ext4_group_t o_group, ext4_group_t n_group)
|
||||
{
|
||||
ext4_group_t last_group;
|
||||
struct ext4_new_flex_group_data *flex_gd;
|
||||
|
||||
flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
|
||||
if (flex_gd == NULL)
|
||||
goto out3;
|
||||
|
||||
if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
|
||||
goto out2;
|
||||
flex_gd->count = flexbg_size;
|
||||
if (unlikely(flexbg_size > MAX_RESIZE_BG))
|
||||
flex_gd->resize_bg = MAX_RESIZE_BG;
|
||||
else
|
||||
flex_gd->resize_bg = flexbg_size;
|
||||
|
||||
flex_gd->groups = kmalloc_array(flexbg_size,
|
||||
/* Avoid allocating large 'groups' array if not needed */
|
||||
last_group = o_group | (flex_gd->resize_bg - 1);
|
||||
if (n_group <= last_group)
|
||||
flex_gd->resize_bg = 1 << fls(n_group - o_group + 1);
|
||||
else if (n_group - last_group < flex_gd->resize_bg)
|
||||
flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1),
|
||||
fls(n_group - last_group));
|
||||
|
||||
flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
|
||||
sizeof(struct ext4_new_group_data),
|
||||
GFP_NOFS);
|
||||
if (flex_gd->groups == NULL)
|
||||
goto out2;
|
||||
|
||||
flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
|
||||
flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
|
||||
GFP_NOFS);
|
||||
if (flex_gd->bg_flags == NULL)
|
||||
goto out1;
|
||||
@ -283,7 +301,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
|
||||
*/
|
||||
static int ext4_alloc_group_tables(struct super_block *sb,
|
||||
struct ext4_new_flex_group_data *flex_gd,
|
||||
int flexbg_size)
|
||||
unsigned int flexbg_size)
|
||||
{
|
||||
struct ext4_new_group_data *group_data = flex_gd->groups;
|
||||
ext4_fsblk_t start_blk;
|
||||
@ -384,12 +402,12 @@ next_group:
|
||||
group = group_data[0].group;
|
||||
|
||||
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
|
||||
"%d groups, flexbg size is %d:\n", flex_gd->count,
|
||||
"%u groups, flexbg size is %u:\n", flex_gd->count,
|
||||
flexbg_size);
|
||||
|
||||
for (i = 0; i < flex_gd->count; i++) {
|
||||
ext4_debug(
|
||||
"adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
|
||||
"adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
|
||||
ext4_bg_has_super(sb, group + i) ? "normal" :
|
||||
"no-super", group + i,
|
||||
group_data[i].blocks_count,
|
||||
@ -1605,8 +1623,7 @@ exit:
|
||||
|
||||
static int ext4_setup_next_flex_gd(struct super_block *sb,
|
||||
struct ext4_new_flex_group_data *flex_gd,
|
||||
ext4_fsblk_t n_blocks_count,
|
||||
unsigned long flexbg_size)
|
||||
ext4_fsblk_t n_blocks_count)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
@ -1630,7 +1647,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
|
||||
BUG_ON(last);
|
||||
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
|
||||
|
||||
last_group = group | (flexbg_size - 1);
|
||||
last_group = group | (flex_gd->resize_bg - 1);
|
||||
if (last_group > n_group)
|
||||
last_group = n_group;
|
||||
|
||||
@ -1990,8 +2007,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
|
||||
ext4_fsblk_t o_blocks_count;
|
||||
ext4_fsblk_t n_blocks_count_retry = 0;
|
||||
unsigned long last_update_time = 0;
|
||||
int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
|
||||
int err = 0;
|
||||
int meta_bg;
|
||||
unsigned int flexbg_size = ext4_flex_bg_size(sbi);
|
||||
|
||||
/* See if the device is actually as big as what was requested */
|
||||
bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
|
||||
@ -2123,7 +2141,7 @@ retry:
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
flex_gd = alloc_flex_gd(flexbg_size);
|
||||
flex_gd = alloc_flex_gd(flexbg_size, o_group, n_group);
|
||||
if (flex_gd == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
@ -2132,8 +2150,7 @@ retry:
|
||||
/* Add flex groups. Note that a regular group is a
|
||||
* flex group with 1 group.
|
||||
*/
|
||||
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
|
||||
flexbg_size)) {
|
||||
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
|
||||
if (time_is_before_jiffies(last_update_time + HZ * 10)) {
|
||||
if (last_update_time)
|
||||
ext4_msg(sb, KERN_INFO,
|
||||
|
@ -2793,15 +2793,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
|
||||
int blocksize =
|
||||
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
if (blocksize < PAGE_SIZE)
|
||||
ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
|
||||
"experimental mount option 'dioread_nolock' "
|
||||
"for blocksize < PAGE_SIZE");
|
||||
}
|
||||
|
||||
err = ext4_check_test_dummy_encryption(fc, sb);
|
||||
if (err)
|
||||
return err;
|
||||
@ -4410,7 +4401,7 @@ static void ext4_set_def_opts(struct super_block *sb,
|
||||
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
|
||||
set_opt(sb, DELALLOC);
|
||||
|
||||
if (sb->s_blocksize == PAGE_SIZE)
|
||||
if (sb->s_blocksize <= PAGE_SIZE)
|
||||
set_opt(sb, DIOREAD_NOLOCK);
|
||||
}
|
||||
|
||||
|
@ -556,7 +556,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
|
||||
struct transaction_chp_stats_s *stats;
|
||||
transaction_t *transaction;
|
||||
journal_t *journal;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
JBUFFER_TRACE(jh, "entry");
|
||||
|
||||
@ -569,16 +568,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
|
||||
|
||||
JBUFFER_TRACE(jh, "removing from transaction");
|
||||
|
||||
/*
|
||||
* If we have failed to write the buffer out to disk, the filesystem
|
||||
* may become inconsistent. We cannot abort the journal here since
|
||||
* we hold j_list_lock and we have to be careful about races with
|
||||
* jbd2_journal_destroy(). So mark the writeback IO error in the
|
||||
* journal here and we abort the journal later from a better context.
|
||||
*/
|
||||
if (buffer_write_io_error(bh))
|
||||
set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
|
||||
|
||||
__buffer_unlink(jh);
|
||||
jh->b_cp_transaction = NULL;
|
||||
percpu_counter_dec(&journal->j_checkpoint_jh_count);
|
||||
|
@ -1534,6 +1534,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
|
||||
journal->j_fs_dev = fs_dev;
|
||||
journal->j_blk_offset = start;
|
||||
journal->j_total_len = len;
|
||||
jbd2_init_fs_dev_write_error(journal);
|
||||
|
||||
err = journal_load_superblock(journal);
|
||||
if (err)
|
||||
@ -1861,7 +1862,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
|
||||
|
||||
if (is_journal_aborted(journal))
|
||||
return -EIO;
|
||||
if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
|
||||
if (jbd2_check_fs_dev_write_error(journal)) {
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
return -EIO;
|
||||
}
|
||||
@ -2159,12 +2160,12 @@ int jbd2_journal_destroy(journal_t *journal)
|
||||
|
||||
/*
|
||||
* OK, all checkpoint transactions have been checked, now check the
|
||||
* write out io error flag and abort the journal if some buffer failed
|
||||
* to write back to the original location, otherwise the filesystem
|
||||
* may become inconsistent.
|
||||
* writeback errseq of fs dev and abort the journal if some buffer
|
||||
* failed to write back to the original location, otherwise the
|
||||
* filesystem may become inconsistent.
|
||||
*/
|
||||
if (!is_journal_aborted(journal) &&
|
||||
test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
|
||||
jbd2_check_fs_dev_write_error(journal))
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
|
||||
if (journal->j_sb_buffer) {
|
||||
|
@ -289,8 +289,6 @@ int jbd2_journal_recover(journal_t *journal)
|
||||
journal_superblock_t * sb;
|
||||
|
||||
struct recovery_info info;
|
||||
errseq_t wb_err;
|
||||
struct address_space *mapping;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
sb = journal->j_superblock;
|
||||
@ -308,9 +306,6 @@ int jbd2_journal_recover(journal_t *journal)
|
||||
return 0;
|
||||
}
|
||||
|
||||
wb_err = 0;
|
||||
mapping = journal->j_fs_dev->bd_inode->i_mapping;
|
||||
errseq_check_and_advance(&mapping->wb_err, &wb_err);
|
||||
err = do_one_pass(journal, &info, PASS_SCAN);
|
||||
if (!err)
|
||||
err = do_one_pass(journal, &info, PASS_REVOKE);
|
||||
@ -334,7 +329,7 @@ int jbd2_journal_recover(journal_t *journal)
|
||||
err2 = sync_blockdev(journal->j_fs_dev);
|
||||
if (!err)
|
||||
err = err2;
|
||||
err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
|
||||
err2 = jbd2_check_fs_dev_write_error(journal);
|
||||
if (!err)
|
||||
err = err2;
|
||||
/* Make sure all replayed data is on permanent storage */
|
||||
|
@ -1231,11 +1231,25 @@ out:
|
||||
int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
struct journal_head *jh;
|
||||
journal_t *journal;
|
||||
int rc;
|
||||
|
||||
if (is_handle_aborted(handle))
|
||||
return -EROFS;
|
||||
|
||||
journal = handle->h_transaction->t_journal;
|
||||
if (jbd2_check_fs_dev_write_error(journal)) {
|
||||
/*
|
||||
* If the fs dev has writeback errors, it may have failed
|
||||
* to async write out metadata buffers in the background.
|
||||
* In this case, we could read old data from disk and write
|
||||
* it out again, which may lead to on-disk filesystem
|
||||
* inconsistency. Aborting journal can avoid it happen.
|
||||
*/
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (jbd2_write_access_granted(handle, bh, false))
|
||||
return 0;
|
||||
|
||||
|
@ -755,11 +755,6 @@ struct journal_s
|
||||
*/
|
||||
unsigned long j_flags;
|
||||
|
||||
/**
|
||||
* @j_atomic_flags: Atomic journaling state flags.
|
||||
*/
|
||||
unsigned long j_atomic_flags;
|
||||
|
||||
/**
|
||||
* @j_errno:
|
||||
*
|
||||
@ -998,6 +993,13 @@ struct journal_s
|
||||
*/
|
||||
struct block_device *j_fs_dev;
|
||||
|
||||
/**
|
||||
* @j_fs_dev_wb_err:
|
||||
*
|
||||
* Records the errseq of the client fs's backing block device.
|
||||
*/
|
||||
errseq_t j_fs_dev_wb_err;
|
||||
|
||||
/**
|
||||
* @j_total_len: Total maximum capacity of the journal region on disk.
|
||||
*/
|
||||
@ -1399,12 +1401,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
|
||||
#define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \
|
||||
JBD2_JOURNAL_FLUSH_ZEROOUT)
|
||||
|
||||
/*
|
||||
* Journal atomic flag definitions
|
||||
*/
|
||||
#define JBD2_CHECKPOINT_IO_ERROR 0x001 /* Detect io error while writing
|
||||
* buffer back to disk */
|
||||
|
||||
/*
|
||||
* Function declarations for the journaling transaction and buffer
|
||||
* management
|
||||
@ -1698,6 +1694,25 @@ static inline void jbd2_journal_abort_handle(handle_t *handle)
|
||||
handle->h_aborted = 1;
|
||||
}
|
||||
|
||||
static inline void jbd2_init_fs_dev_write_error(journal_t *journal)
|
||||
{
|
||||
struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping;
|
||||
|
||||
/*
|
||||
* Save the original wb_err value of client fs's bdev mapping which
|
||||
* could be used to detect the client fs's metadata async write error.
|
||||
*/
|
||||
errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err);
|
||||
}
|
||||
|
||||
static inline int jbd2_check_fs_dev_write_error(journal_t *journal)
|
||||
{
|
||||
struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping;
|
||||
|
||||
return errseq_check(&mapping->wb_err,
|
||||
READ_ONCE(journal->j_fs_dev_wb_err));
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
/* Comparison functions for transaction IDs: perform comparisons using
|
||||
|
Loading…
Reference in New Issue
Block a user