Various bug fixes and cleanups for ext4; no new features this cycle.

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEOrBXt+eNlFyMVZH70292m8EYBPAFAl/iRkQACgkQ0292m8EY
 BPDxnw/+Nyp5/bscuKflyAs8m8BUDE8MSP9TTaPcflDgyV1Z3QiSx5ITOXi4JSy6
 V5W1RXScJL3mG8QrtukY7zP7bSTS+UDQlbZ1BlpiyZDjo3l15gKwWVOmJteGusfI
 U+xRz1RoiXcGTq6vSDYZIKquNVNezAE0UbYVRECm+lEjfka1vrlQz20gEgvh/mvh
 ZQV5wLqr61gGsGDhHSw+HD16KYJQWtwcRirNuzw7VfFtUJNfthXkPy8mIuJcKx2a
 sHuGEnLxhwapl27Os2o8gMTjt05nQZSZ6Fx3p7i3c4NBknR3F8ZI1LVSvDDZU6Rz
 v0v/VfMEP/CEg5Vq8gLL1dgWCwF0GWKYLdhyge/oCQU9r3RbheRKCRmOqyrYw5sA
 e4AqfWw39OmKV8L54XKUV4lXaBd7rCHW0l0uJdm1cdGxg7kewjS6TajhJ30tRqz/
 vvCo5HZ/TgF71AhONu+0WSzbuOOZzRxJC4k+WLRHspOzXVg7f8PzAxkfHMJaEk3t
 TsuM0prorm4r1PAb7TblXJpV7LlI2sjhYqsC/ngAfmZqw59wou3n+hytg8BkKlOz
 Gq9LUdFw4MJqK0OqaRzwWCcXFdc5EEFS8Kq4aVNLbNMmF1gfYy/RzTRc1iZ5RkN+
 stk7OYhs657Kv925nhvZsLCoOwGzp+8dbKmW2TlgQMwYHx30+Jw=
 =a5OE
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Various bug fixes and cleanups for ext4; no new features this cycle"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (29 commits)
  ext4: remove unnecessary wbc parameter from ext4_bio_write_page
  ext4: avoid s_mb_prefetch to be zero in individual scenarios
  ext4: defer saving error info from atomic context
  ext4: simplify ext4 error translation
  ext4: move functions in super.c
  ext4: make ext4_abort() use __ext4_error()
  ext4: standardize error message in ext4_protect_reserved_inode()
  ext4: remove redundant sb checksum recomputation
  ext4: don't remount read-only with errors=continue on reboot
  ext4: fix deadlock with fs freezing and EA inodes
  jbd2: add a helper to find out number of fast commit blocks
  ext4: make fast_commit.h byte identical with e2fsprogs/fast_commit.h
  ext4: fix fall-through warnings for Clang
  ext4: add docs about fast commit idempotence
  ext4: remove the unused EXT4_CURRENT_REV macro
  ext4: fix an IS_ERR() vs NULL check
  ext4: check for invalid block size early when mounting a file system
  ext4: fix a memory leak of ext4_free_data
  ext4: delete nonsensical (commented-out) code inside ext4_xattr_block_set()
  ext4: update ext4_data_block_valid related comments
  ...
This commit is contained in:
Linus Torvalds 2020-12-24 14:16:02 -08:00
commit 555a6e8c11
19 changed files with 504 additions and 378 deletions

View File

@ -681,3 +681,53 @@ Here is the list of supported tags and their meanings:
- Stores the TID of the commit, CRC of the fast commit of which this tag
represents the end of
Fast Commit Replay Idempotence
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fast commits tags are idempotent in nature provided the recovery code follows
certain rules. The guiding principle that the commit path follows while
committing is that it stores the result of a particular operation instead of
storing the procedure.
Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
was associated with inode 10. During fast commit, instead of storing this
operation as a procedure "rename a to b", we store the resulting file system
state as a "series" of outcomes:
- Link dirent b to inode 10
- Unlink dirent a
- Inode 10 with valid refcount
Now when recovery code runs, it needs "enforce" this state on the file
system. This is what guarantees idempotence of fast commit replay.
Let's take an example of a procedure that is not idempotent and see how fast
commits make it idempotent. Consider following sequence of operations:
1) rm A
2) mv B A
3) read A
If we store this sequence of operations as is then the replay is not idempotent.
Let's say while in replay, we crash after (2). During the second replay,
file A (which was actually created as a result of "mv B A" operation) would get
deleted. Thus, file named A would be absent when we try to read A. So, this
sequence of operations is not idempotent. However, as mentioned above, instead
of storing the procedure fast commits store the outcome of each procedure. Thus
the fast commit log for above procedure would be as follows:
(Let's assume dirent A was linked to inode 10 and dirent B was linked to
inode 11 before the replay)
1) Unlink A
2) Link A to inode 11
3) Unlink B
4) Inode 11
If we crash after (3) we will have file A linked to inode 11. During the second
replay, we will remove file A (inode 11). But we will create it back and make
it point to inode 11. We won't find B, so we'll just skip that step. At this
point, the refcount for inode 11 is not reliable, but that gets fixed by the
replay of last inode 11 tag. Thus, by converting a non-idempotent procedure
into a series of idempotent outcomes, fast commits ensured idempotence during
the replay.

View File

@ -185,7 +185,7 @@ static int ext4_init_block_bitmap(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
J_ASSERT_BH(bh, buffer_locked(bh));
ASSERT(buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */

View File

@ -176,12 +176,10 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
__ext4_error(sb, __func__, __LINE__,
-err, map.m_pblk,
"blocks %llu-%llu from inode %u overlap system zone",
map.m_pblk,
map.m_pblk + map.m_len - 1,
ino);
EXT4_ERROR_INODE_ERR(inode, -err,
"blocks %llu-%llu from inode overlap system zone",
map.m_pblk,
map.m_pblk + map.m_len - 1);
}
break;
}
@ -206,7 +204,7 @@ static void ext4_destroy_system_zone(struct rcu_head *rcu)
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
* racing with ext4_data_block_valid() calls reading system_blks rbtree
* racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. That's why we first build the rbtree and then
* swap it in place.
*/
@ -258,7 +256,7 @@ int ext4_setup_system_zone(struct super_block *sb)
/*
* System blks rbtree complete, announce it once to prevent racing
* with ext4_data_block_valid() accessing the rbtree at the same
* with ext4_inode_block_valid() accessing the rbtree at the same
* time.
*/
rcu_assign_pointer(sbi->s_system_blks, system_blks);
@ -278,7 +276,7 @@ err:
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
* racing with ext4_data_block_valid() calls reading system_blks rbtree
* racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. So we first clear the system_blks pointer and
* then free the rbtree only after RCU grace period expires.
*/

View File

@ -98,6 +98,16 @@
#define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define ASSERT(assert) \
do { \
if (unlikely(!(assert))) { \
printk(KERN_EMERG \
"Assertion failure in %s() at %s:%d: '%s'\n", \
__func__, __FILE__, __LINE__, #assert); \
BUG(); \
} \
} while (0)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@ -1619,6 +1629,27 @@ struct ext4_sb_info {
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
/* Information about errors that happened during this mount */
spinlock_t s_error_lock;
int s_add_error_count;
int s_first_error_code;
__u32 s_first_error_line;
__u32 s_first_error_ino;
__u64 s_first_error_block;
const char *s_first_error_func;
time64_t s_first_error_time;
int s_last_error_code;
__u32 s_last_error_line;
__u32 s_last_error_ino;
__u64 s_last_error_block;
const char *s_last_error_func;
time64_t s_last_error_time;
/*
* If we are in a context where we cannot update error information in
* the on-disk superblock, we queue this work to do it.
*/
struct work_struct s_error_work;
/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
@ -1858,7 +1889,6 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
#define EXT4_GOOD_OLD_INODE_SIZE 128
@ -2952,9 +2982,9 @@ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
extern __printf(6, 7)
void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64,
const char *, ...);
extern __printf(7, 8)
void __ext4_error(struct super_block *, const char *, unsigned int, bool,
int, __u64, const char *, ...);
extern __printf(6, 7)
void __ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, int, const char *, ...);
@ -2963,9 +2993,6 @@ void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
const char *, ...);
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
extern __printf(5, 6)
void __ext4_abort(struct super_block *, const char *, unsigned int, int,
const char *, ...);
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
@ -2995,6 +3022,9 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
#define ext4_abort(sb, err, fmt, a...) \
__ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a)
#ifdef CONFIG_PRINTK
#define ext4_error_inode(inode, func, line, block, fmt, ...) \
@ -3005,11 +3035,11 @@ void __ext4_grp_locked_error(const char *, unsigned int,
#define ext4_error_file(file, func, line, block, fmt, ...) \
__ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
#define ext4_error(sb, fmt, ...) \
__ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__)
__ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \
##__VA_ARGS__)
#define ext4_error_err(sb, err, fmt, ...) \
__ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__)
#define ext4_abort(sb, err, fmt, ...) \
__ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__)
__ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \
##__VA_ARGS__)
#define ext4_warning(sb, fmt, ...) \
__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning_inode(inode, fmt, ...) \
@ -3042,17 +3072,12 @@ do { \
#define ext4_error(sb, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
__ext4_error(sb, "", 0, 0, 0, " "); \
__ext4_error(sb, "", 0, false, 0, 0, " "); \
} while (0)
#define ext4_error_err(sb, err, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
__ext4_error(sb, "", 0, err, 0, " "); \
} while (0)
#define ext4_abort(sb, err, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
__ext4_abort(sb, "", 0, err, " "); \
__ext4_error(sb, "", 0, false, err, 0, " "); \
} while (0)
#define ext4_warning(sb, fmt, ...) \
do { \
@ -3361,6 +3386,21 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group));
}
#ifdef CONFIG_QUOTA
static inline bool ext4_quota_capable(struct super_block *sb)
{
return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb));
}
static inline bool ext4_is_quota_journalled(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
return (ext4_has_feature_quota(sb) ||
sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]);
}
#endif
/*
* Block validity checking
*/
@ -3609,7 +3649,6 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc,
bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);

View File

@ -296,8 +296,8 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
if (err) {
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
__ext4_abort(inode->i_sb, where, line, -err,
"error %d when attempting revoke", err);
__ext4_error(inode->i_sb, where, line, true, -err, 0,
"error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;

View File

@ -86,17 +86,14 @@
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
* allocated so we need to update only data block */
#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
ext4_has_feature_quota(sb)) ? 1 : 0)
#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */
#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
ext4_has_feature_quota(sb)) ?\
#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_INIT_REWRITE) : 0)
#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
ext4_has_feature_quota(sb)) ?\
#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_DEL_REWRITE) : 0)
#else

View File

@ -5815,8 +5815,8 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
int ret;
path = ext4_find_extent(inode, start, NULL, 0);
if (!path)
return -EINVAL;
if (IS_ERR(path))
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
ret = -EFSCORRUPTED;
@ -5988,7 +5988,6 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
kfree(path);
break;
}
ex = path2[path2->p_depth].p_ext;
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
cmp1 = cmp2 = 0;
if (i <= path->p_depth)

View File

@ -103,8 +103,69 @@
*
* Replay code should thus check for all the valid tails in the FC area.
*
* Fast Commit Replay Idempotence
* ------------------------------
*
* Fast commits tags are idempotent in nature provided the recovery code follows
* certain rules. The guiding principle that the commit path follows while
* committing is that it stores the result of a particular operation instead of
* storing the procedure.
*
* Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
* was associated with inode 10. During fast commit, instead of storing this
* operation as a procedure "rename a to b", we store the resulting file system
* state as a "series" of outcomes:
*
* - Link dirent b to inode 10
* - Unlink dirent a
* - Inode <10> with valid refcount
*
* Now when recovery code runs, it needs "enforce" this state on the file
* system. This is what guarantees idempotence of fast commit replay.
*
* Let's take an example of a procedure that is not idempotent and see how fast
* commits make it idempotent. Consider following sequence of operations:
*
* rm A; mv B A; read A
* (x) (y) (z)
*
* (x), (y) and (z) are the points at which we can crash. If we store this
* sequence of operations as is then the replay is not idempotent. Let's say
* while in replay, we crash at (z). During the second replay, file A (which was
* actually created as a result of "mv B A" operation) would get deleted. Thus,
* file named A would be absent when we try to read A. So, this sequence of
* operations is not idempotent. However, as mentioned above, instead of storing
* the procedure fast commits store the outcome of each procedure. Thus the fast
* commit log for above procedure would be as follows:
*
* (Let's assume dirent A was linked to inode 10 and dirent B was linked to
* inode 11 before the replay)
*
* [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11]
* (w) (x) (y) (z)
*
* If we crash at (z), we will have file A linked to inode 11. During the second
* replay, we will remove file A (inode 11). But we will create it back and make
* it point to inode 11. We won't find B, so we'll just skip that step. At this
* point, the refcount for inode 11 is not reliable, but that gets fixed by the
* replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
* similarly. Thus, by converting a non-idempotent procedure into a series of
* idempotent outcomes, fast commits ensured idempotence during the replay.
*
* TODOs
* -----
*
* 0) Fast commit replay path hardening: Fast commit replay code should use
* journal handles to make sure all the updates it does during the replay
* path are atomic. With that if we crash during fast commit replay, after
* trying to do recovery again, we will find a file system where fast commit
* area is invalid (because new full commit would be found). In order to deal
* with that, fast commit replay code should ensure that the "FC_REPLAY"
* superblock state is persisted before starting the replay, so that after
* the crash, fast commit recovery code can look at that flag and perform
* fast commit recovery even if that area is invalidated by later full
* commits.
*
* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
* eligible update must be protected within ext4_fc_start_update() and
* ext4_fc_stop_update(). These routines are called at much higher
@ -1220,18 +1281,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
/* Ext4 Replay Path Routines */
/* Get length of a particular tlv */
static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
{
return le16_to_cpu(tl->fc_len);
}
/* Get a pointer to "value" of a tlv */
static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
{
return (u8 *)tl + sizeof(*tl);
}
/* Helper struct for dentry replay routines */
struct dentry_info_args {
int parent_ino, dname_len, ino, inode_len;
@ -1770,32 +1819,6 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
return 0;
}
static inline const char *tag2str(u16 tag)
{
switch (tag) {
case EXT4_FC_TAG_LINK:
return "TAG_ADD_ENTRY";
case EXT4_FC_TAG_UNLINK:
return "TAG_DEL_ENTRY";
case EXT4_FC_TAG_ADD_RANGE:
return "TAG_ADD_RANGE";
case EXT4_FC_TAG_CREAT:
return "TAG_CREAT_DENTRY";
case EXT4_FC_TAG_DEL_RANGE:
return "TAG_DEL_RANGE";
case EXT4_FC_TAG_INODE:
return "TAG_INODE";
case EXT4_FC_TAG_PAD:
return "TAG_PAD";
case EXT4_FC_TAG_TAIL:
return "TAG_TAIL";
case EXT4_FC_TAG_HEAD:
return "TAG_HEAD";
default:
return "TAG_ERROR";
}
}
static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
{
struct ext4_fc_replay_state *state;

View File

@ -3,6 +3,11 @@
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
/*
* Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and
* linux/fs/ext4/fast_commit.h. These file should always be byte identical.
*/
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
#define EXT4_FC_TAG_DEL_RANGE 0x0002
@ -50,7 +55,7 @@ struct ext4_fc_del_range {
struct ext4_fc_dentry_info {
__le32 fc_parent_ino;
__le32 fc_ino;
u8 fc_dname[0];
__u8 fc_dname[0];
};
/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
@ -65,19 +70,6 @@ struct ext4_fc_tail {
__le32 fc_crc;
};
/*
* In memory list of dentry updates that are performed on the file
* system used by fast commit code.
*/
struct ext4_fc_dentry_update {
int fcd_op; /* Type of update create / unlink / link */
int fcd_parent; /* Parent inode number */
int fcd_ino; /* Inode number */
struct qstr fcd_name; /* Dirent name */
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
struct list_head fcd_list;
};
/*
* Fast commit reason codes
*/
@ -107,6 +99,20 @@ enum {
EXT4_FC_REASON_MAX
};
#ifdef __KERNEL__
/*
* In memory list of dentry updates that are performed on the file
* system used by fast commit code.
*/
struct ext4_fc_dentry_update {
int fcd_op; /* Type of update create / unlink / link */
int fcd_parent; /* Parent inode number */
int fcd_ino; /* Inode number */
struct qstr fcd_name; /* Dirent name */
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
struct list_head fcd_list;
};
struct ext4_fc_stats {
unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
unsigned long fc_num_commits;
@ -145,13 +151,51 @@ struct ext4_fc_replay_state {
};
#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
#endif
#define fc_for_each_tl(__start, __end, __tl) \
for (tl = (struct ext4_fc_tl *)start; \
(u8 *)tl < (u8 *)end; \
tl = (struct ext4_fc_tl *)((u8 *)tl + \
for (tl = (struct ext4_fc_tl *)(__start); \
(__u8 *)tl < (__u8 *)(__end); \
tl = (struct ext4_fc_tl *)((__u8 *)tl + \
sizeof(struct ext4_fc_tl) + \
+ le16_to_cpu(tl->fc_len)))
static inline const char *tag2str(__u16 tag)
{
switch (tag) {
case EXT4_FC_TAG_LINK:
return "ADD_ENTRY";
case EXT4_FC_TAG_UNLINK:
return "DEL_ENTRY";
case EXT4_FC_TAG_ADD_RANGE:
return "ADD_RANGE";
case EXT4_FC_TAG_CREAT:
return "CREAT_DENTRY";
case EXT4_FC_TAG_DEL_RANGE:
return "DEL_RANGE";
case EXT4_FC_TAG_INODE:
return "INODE";
case EXT4_FC_TAG_PAD:
return "PAD";
case EXT4_FC_TAG_TAIL:
return "TAIL";
case EXT4_FC_TAG_HEAD:
return "HEAD";
default:
return "ERROR";
}
}
/* Get length of a particular tlv */
static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
{
return le16_to_cpu(tl->fc_len);
}
/* Get a pointer to "value" of a tlv */
static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
{
return (__u8 *)tl + sizeof(*tl);
}
#endif /* __FAST_COMMIT_H__ */

View File

@ -136,7 +136,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO;
J_ASSERT(ext4_journal_current_handle() == NULL);
ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);

View File

@ -534,8 +534,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary);

View File

@ -175,6 +175,7 @@ void ext4_evict_inode(struct inode *inode)
*/
int extra_credits = 6;
struct ext4_xattr_inode_array *ea_inode_array = NULL;
bool freeze_protected = false;
trace_ext4_evict_inode(inode);
@ -232,9 +233,14 @@ void ext4_evict_inode(struct inode *inode)
/*
* Protect us against freezing - iput() caller didn't have to have any
* protection against it
* protection against it. When we are in a running transaction though,
* we are already protected against freezing and we cannot grab further
* protection due to lock ordering constraints.
*/
sb_start_intwrite(inode->i_sb);
if (!ext4_journal_current_handle()) {
sb_start_intwrite(inode->i_sb);
freeze_protected = true;
}
if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
@ -253,7 +259,8 @@ void ext4_evict_inode(struct inode *inode)
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
sb_end_intwrite(inode->i_sb);
if (freeze_protected)
sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@ -294,7 +301,8 @@ void ext4_evict_inode(struct inode *inode)
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
sb_end_intwrite(inode->i_sb);
if (freeze_protected)
sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
goto no_delete;
}
@ -323,7 +331,8 @@ stop_handle:
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
if (freeze_protected)
sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
@ -830,8 +839,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|| handle != NULL || create == 0);
ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|| handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
@ -846,9 +855,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
J_ASSERT(create != 0);
J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|| (handle != NULL));
ASSERT(create != 0);
ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|| (handle != NULL));
/*
* Now that we do not always journal data, we should
@ -2055,7 +2064,7 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return -ENOMEM;
}
ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite);
ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
@ -2089,7 +2098,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
err = ext4_bio_write_page(&mpd->io_submit, page, len, false);
if (!err)
mpd->wbc->nr_to_write--;
mpd->first_page++;
@ -4610,7 +4619,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
if (flags & EXT4_IGET_HANDLE)
return ERR_PTR(-ESTALE);
__ext4_error(sb, function, line, EFSCORRUPTED, 0,
__ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
"inode #%lu: comm %s: iget: illegal inode #",
ino, current->comm);
return ERR_PTR(-EFSCORRUPTED);

View File

@ -822,24 +822,6 @@ void ext4_mb_generate_buddy(struct super_block *sb,
spin_unlock(&sbi->s_bal_lock);
}
static void mb_regenerate_buddy(struct ext4_buddy *e4b)
{
int count;
int order = 1;
void *buddy;
while ((buddy = mb_find_buddy(e4b, order++, &count))) {
ext4_set_bits(buddy, 0, count);
}
e4b->bd_info->bb_fragments = 0;
memset(e4b->bd_info->bb_counters, 0,
sizeof(*e4b->bd_info->bb_counters) *
(e4b->bd_sb->s_blocksize_bits + 2));
ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
e4b->bd_bitmap, e4b->bd_group);
}
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@ -1307,22 +1289,18 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{
int order = 1;
int bb_incr = 1 << (e4b->bd_blkbits - 1);
int order = 1, max;
void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
bb = e4b->bd_buddy;
while (order <= e4b->bd_blkbits + 1) {
block = block >> 1;
if (!mb_test_bit(block, bb)) {
bb = mb_find_buddy(e4b, order, &max);
if (!mb_test_bit(block >> order, bb)) {
/* this block is part of buddy of order 'order' */
return order;
}
bb += bb_incr;
bb_incr >>= 1;
order++;
}
return 0;
@ -1512,7 +1490,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_regenerate_buddy(e4b);
goto done;
}
@ -2395,9 +2372,9 @@ repeat:
nr = sbi->s_mb_prefetch;
if (ext4_has_feature_flex_bg(sb)) {
nr = (group / sbi->s_mb_prefetch) *
sbi->s_mb_prefetch;
nr = nr + sbi->s_mb_prefetch - group;
nr = 1 << sbi->s_log_groups_per_flex;
nr -= group & (nr - 1);
nr = min(nr, sbi->s_mb_prefetch);
}
prefetch_grp = ext4_mb_prefetch(sb, group,
nr, &prefetch_ios);
@ -2733,7 +2710,8 @@ static int ext4_mb_init_backend(struct super_block *sb)
if (ext4_has_feature_flex_bg(sb)) {
/* a single flex group is supposed to be read by a single IO */
sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex,
BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
} else {
sbi->s_mb_prefetch = 32;
@ -5126,6 +5104,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}

View File

@ -182,10 +182,6 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
#ifndef assert
#define assert(test) J_ASSERT(test)
#endif
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
@ -843,7 +839,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
break;
}
}
assert (at == p - 1);
ASSERT(at == p - 1);
}
at = p - 1;
@ -1259,8 +1255,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
struct dx_entry *old = frame->at, *new = old + 1;
int count = dx_get_count(entries);
assert(count < dx_get_limit(entries));
assert(old < entries + count);
ASSERT(count < dx_get_limit(entries));
ASSERT(old < entries + count);
memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
dx_set_hash(new, hash);
dx_set_block(new, block);
@ -2959,7 +2955,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* hold i_mutex, or the inode can not be referenced from outside,
* so i_nlink should not be bumped due to race
*/
J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
BUFFER_TRACE(sbi->s_sbh, "get_write_access");

View File

@ -111,9 +111,6 @@ static void ext4_finish_bio(struct bio *bio)
unsigned under_io = 0;
unsigned long flags;
if (!page)
continue;
if (fscrypt_is_bounce_page(page)) {
bounce_page = page;
page = fscrypt_pagecache_page(bounce_page);
@ -438,7 +435,6 @@ submit_and_retry:
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc,
bool keep_towrite)
{
struct page *bounce_page = NULL;
@ -448,6 +444,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
struct writeback_control *wbc = io->io_wbc;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));

View File

@ -404,10 +404,8 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
time64_t now = ktime_get_real_seconds();
now = clamp_val(now, 0, (1ull << 40) - 1);
*lo = cpu_to_le32(lower_32_bits(now));
@ -419,108 +417,11 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
#define ext4_update_tstamp(es, tstamp) \
__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
ktime_get_real_seconds())
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
static void __save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
int err;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (bdev_read_only(sb->s_bdev))
return;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_update_tstamp(es, s_last_error_time);
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line);
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
switch (error) {
case EIO:
err = EXT4_ERR_EIO;
break;
case ENOMEM:
err = EXT4_ERR_ENOMEM;
break;
case EFSBADCRC:
err = EXT4_ERR_EFSBADCRC;
break;
case 0:
case EFSCORRUPTED:
err = EXT4_ERR_EFSCORRUPTED;
break;
case ENOSPC:
err = EXT4_ERR_ENOSPC;
break;
case ENOKEY:
err = EXT4_ERR_ENOKEY;
break;
case EROFS:
err = EXT4_ERR_EROFS;
break;
case EFBIG:
err = EXT4_ERR_EFBIG;
break;
case EEXIST:
err = EXT4_ERR_EEXIST;
break;
case ERANGE:
err = EXT4_ERR_ERANGE;
break;
case EOVERFLOW:
err = EXT4_ERR_EOVERFLOW;
break;
case EBUSY:
err = EXT4_ERR_EBUSY;
break;
case ENOTDIR:
err = EXT4_ERR_ENOTDIR;
break;
case ENOTEMPTY:
err = EXT4_ERR_ENOTEMPTY;
break;
case ESHUTDOWN:
err = EXT4_ERR_ESHUTDOWN;
break;
case EFAULT:
err = EXT4_ERR_EFAULT;
break;
default:
err = EXT4_ERR_UNKNOWN;
}
es->s_last_error_errcode = err;
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
es->s_first_error_time_hi = es->s_last_error_time_hi;
strncpy(es->s_first_error_func, func,
sizeof(es->s_first_error_func));
es->s_first_error_line = cpu_to_le32(line);
es->s_first_error_ino = es->s_last_error_ino;
es->s_first_error_block = es->s_last_error_block;
es->s_first_error_errcode = es->s_last_error_errcode;
}
/*
* Start the daily error reporting function if it hasn't been
* started already
*/
if (!es->s_error_count)
mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
le32_add_cpu(&es->s_error_count, 1);
}
static void save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
__save_error_info(sb, error, ino, block, func, line);
if (!bdev_read_only(sb->s_bdev))
ext4_commit_super(sb, 1);
}
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
@ -649,6 +550,83 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}
struct ext4_err_translation {
int code;
int errno;
};
#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
static struct ext4_err_translation err_translation[] = {
EXT4_ERR_TRANSLATE(EIO),
EXT4_ERR_TRANSLATE(ENOMEM),
EXT4_ERR_TRANSLATE(EFSBADCRC),
EXT4_ERR_TRANSLATE(EFSCORRUPTED),
EXT4_ERR_TRANSLATE(ENOSPC),
EXT4_ERR_TRANSLATE(ENOKEY),
EXT4_ERR_TRANSLATE(EROFS),
EXT4_ERR_TRANSLATE(EFBIG),
EXT4_ERR_TRANSLATE(EEXIST),
EXT4_ERR_TRANSLATE(ERANGE),
EXT4_ERR_TRANSLATE(EOVERFLOW),
EXT4_ERR_TRANSLATE(EBUSY),
EXT4_ERR_TRANSLATE(ENOTDIR),
EXT4_ERR_TRANSLATE(ENOTEMPTY),
EXT4_ERR_TRANSLATE(ESHUTDOWN),
EXT4_ERR_TRANSLATE(EFAULT),
};
static int ext4_errno_to_code(int errno)
{
int i;
for (i = 0; i < ARRAY_SIZE(err_translation); i++)
if (err_translation[i].errno == errno)
return err_translation[i].code;
return EXT4_ERR_UNKNOWN;
}
static void __save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (bdev_read_only(sb->s_bdev))
return;
/* We default to EFSCORRUPTED error... */
if (error == 0)
error = EFSCORRUPTED;
spin_lock(&sbi->s_error_lock);
sbi->s_add_error_count++;
sbi->s_last_error_code = error;
sbi->s_last_error_line = line;
sbi->s_last_error_ino = ino;
sbi->s_last_error_block = block;
sbi->s_last_error_func = func;
sbi->s_last_error_time = ktime_get_real_seconds();
if (!sbi->s_first_error_time) {
sbi->s_first_error_code = error;
sbi->s_first_error_line = line;
sbi->s_first_error_ino = ino;
sbi->s_first_error_block = block;
sbi->s_first_error_func = func;
sbi->s_first_error_time = sbi->s_last_error_time;
}
spin_unlock(&sbi->s_error_lock);
}
static void save_error_info(struct super_block *sb, int error,
__u32 ino, __u64 block,
const char *func, unsigned int line)
{
__save_error_info(sb, error, ino, block, func, line);
if (!bdev_read_only(sb->s_bdev))
ext4_commit_super(sb, 1);
}
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@ -662,40 +640,50 @@ static bool system_going_down(void)
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
*
* If force_ro is set, we unconditionally force the filesystem into an
* ABORT|READONLY state, unless the error response on the fs has been set to
* panic in which case we take the easy way out and panic immediately. This is
* used to deal with unrecoverable failures such as journal IO errors or ENOMEM
* at a critical moment in log management.
*/
static void ext4_handle_error(struct super_block *sb)
static void ext4_handle_error(struct super_block *sb, bool force_ro)
{
journal_t *journal = EXT4_SB(sb)->s_journal;
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
if (sb_rdonly(sb))
if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
return;
if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
if (journal)
jbd2_journal_abort(journal, -EIO);
}
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
if (journal)
jbd2_journal_abort(journal, -EIO);
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
* disabled.
*/
if (test_opt(sb, ERRORS_RO) || system_going_down()) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
* before ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
} else if (test_opt(sb, ERRORS_PANIC)) {
if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible before
* ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
}
static void flush_stashed_error_work(struct work_struct *work)
{
struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
s_error_work);
ext4_commit_super(sbi->s_sb, 1);
}
#define ext4_error_ratelimit(sb) \
@ -703,7 +691,7 @@ static void ext4_handle_error(struct super_block *sb)
"EXT4-fs error")
void __ext4_error(struct super_block *sb, const char *function,
unsigned int line, int error, __u64 block,
unsigned int line, bool force_ro, int error, __u64 block,
const char *fmt, ...)
{
struct va_format vaf;
@ -723,7 +711,7 @@ void __ext4_error(struct super_block *sb, const char *function,
va_end(args);
}
save_error_info(sb, error, 0, block, function, line);
ext4_handle_error(sb);
ext4_handle_error(sb, force_ro);
}
void __ext4_error_inode(struct inode *inode, const char *function,
@ -755,7 +743,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
}
save_error_info(inode->i_sb, error, inode->i_ino, block,
function, line);
ext4_handle_error(inode->i_sb);
ext4_handle_error(inode->i_sb, false);
}
void __ext4_error_file(struct file *file, const char *function,
@ -794,7 +782,7 @@ void __ext4_error_file(struct file *file, const char *function,
}
save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
function, line);
ext4_handle_error(inode->i_sb);
ext4_handle_error(inode->i_sb, false);
}
const char *ext4_decode_error(struct super_block *sb, int errno,
@ -862,51 +850,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
}
save_error_info(sb, -errno, 0, 0, function, line);
ext4_handle_error(sb);
}
/*
* ext4_abort is a much stronger failure handler than ext4_error. The
* abort function may be used to deal with unrecoverable failures such
* as journal IO errors or ENOMEM at a critical moment in log management.
*
* We unconditionally force the filesystem into an ABORT|READONLY state,
* unless the error response on the fs has been set to panic in which
* case we take the easy way out and panic immediately.
*/
void __ext4_abort(struct super_block *sb, const char *function,
unsigned int line, int error, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return;
save_error_info(sb, error, 0, 0, function, line);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
sb->s_id, function, line, &vaf);
va_end(args);
if (sb_rdonly(sb) == 0) {
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
* before ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
}
if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
panic("EXT4-fs panic from previous error\n");
ext4_handle_error(sb, false);
}
void __ext4_msg(struct super_block *sb,
@ -982,8 +926,6 @@ __acquires(bitlock)
return;
trace_ext4_error(sb, function, line);
__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@ -999,17 +941,16 @@ __acquires(bitlock)
va_end(args);
}
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
if (test_opt(sb, ERRORS_CONT)) {
ext4_commit_super(sb, 0);
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
ext4_unlock_group(sb, grp);
ext4_commit_super(sb, 1);
ext4_handle_error(sb);
save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
ext4_handle_error(sb, false);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
@ -1181,6 +1122,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
/*
@ -1240,7 +1182,7 @@ static void ext4_put_super(struct super_block *sb)
* in-memory list had better be clean by this point. */
if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
J_ASSERT(list_empty(&sbi->s_orphan));
ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
@ -4005,6 +3947,21 @@ static void ext4_set_resv_clusters(struct super_block *sb)
atomic64_set(&sbi->s_resv_clusters, resv_clusters);
}
static const char *ext4_quota_mode(struct super_block *sb)
{
#ifdef CONFIG_QUOTA
if (!ext4_quota_capable(sb))
return "none";
if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
return "journalled";
else
return "writeback";
#else
return "disabled";
#endif
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@ -4073,7 +4030,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR, "unable to read superblock");
ret = PTR_ERR(bh);
bh = NULL;
goto out_fail;
}
/*
@ -4187,19 +4143,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (le32_to_cpu(es->s_log_block_size) >
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Invalid log block size: %u",
le32_to_cpu(es->s_log_block_size));
goto failed_mount;
}
if (le32_to_cpu(es->s_log_cluster_size) >
(EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Invalid log cluster size: %u",
le32_to_cpu(es->s_log_cluster_size));
goto failed_mount;
}
blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize == PAGE_SIZE)
set_opt(sb, DIOREAD_NOLOCK);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
"Unsupported filesystem blocksize %d (%d log_block_size)",
blocksize, le32_to_cpu(es->s_log_block_size));
goto failed_mount;
}
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
@ -4417,21 +4380,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
goto failed_mount;
if (le32_to_cpu(es->s_log_block_size) >
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Invalid log block size: %u",
le32_to_cpu(es->s_log_block_size));
goto failed_mount;
}
if (le32_to_cpu(es->s_log_cluster_size) >
(EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Invalid log cluster size: %u",
le32_to_cpu(es->s_log_cluster_size));
goto failed_mount;
}
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
@ -4702,7 +4650,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"can't read group descriptor %d", i);
db_count = i;
ret = PTR_ERR(bh);
bh = NULL;
goto failed_mount2;
}
rcu_read_lock();
@ -4717,6 +4664,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
@ -4872,6 +4821,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"requested data journaling mode");
goto failed_mount_wq;
}
break;
default:
break;
}
@ -5000,13 +4950,11 @@ no_journal:
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
@ -5086,10 +5034,11 @@ no_journal:
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %.*s%s%s", descr,
"Opts: %.*s%s%s. Quota mode: %s.", descr,
(int) sizeof(sbi->s_es->s_mount_opts),
sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
*sbi->s_es->s_mount_opts ? "; " : "", orig_data,
ext4_quota_mode(sb));
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@ -5154,6 +5103,7 @@ failed_mount3a:
ext4_es_unregister_shrinker(sbi);
failed_mount3:
del_timer_sync(&sbi->s_err_report);
flush_work(&sbi->s_error_work);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@ -5480,6 +5430,7 @@ err_out:
static int ext4_commit_super(struct super_block *sb, int sync)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
@ -5511,6 +5462,46 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
/* Copy error information to the on-disk superblock */
spin_lock(&sbi->s_error_lock);
if (sbi->s_add_error_count > 0) {
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
if (!es->s_first_error_time && !es->s_first_error_time_hi) {
__ext4_update_tstamp(&es->s_first_error_time,
&es->s_first_error_time_hi,
sbi->s_first_error_time);
strncpy(es->s_first_error_func, sbi->s_first_error_func,
sizeof(es->s_first_error_func));
es->s_first_error_line =
cpu_to_le32(sbi->s_first_error_line);
es->s_first_error_ino =
cpu_to_le32(sbi->s_first_error_ino);
es->s_first_error_block =
cpu_to_le64(sbi->s_first_error_block);
es->s_first_error_errcode =
ext4_errno_to_code(sbi->s_first_error_code);
}
__ext4_update_tstamp(&es->s_last_error_time,
&es->s_last_error_time_hi,
sbi->s_last_error_time);
strncpy(es->s_last_error_func, sbi->s_last_error_func,
sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
es->s_last_error_errcode =
ext4_errno_to_code(sbi->s_last_error_code);
/*
* Start the daily error reporting function if it hasn't been
* started already
*/
if (!es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
sbi->s_add_error_count = 0;
}
spin_unlock(&sbi->s_error_lock);
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
if (sync)
@ -5864,6 +5855,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
/* Flush outstanding errors before changing fs state */
flush_work(&sbi->s_error_work);
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
err = -EROFS;
@ -6022,7 +6016,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
*/
*flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
orig_data, ext4_quota_mode(sb));
kfree(orig_data);
return 0;
@ -6201,11 +6196,8 @@ static int ext4_release_dquot(struct dquot *dquot)
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* Are we journaling quotas? */
if (ext4_has_feature_quota(sb) ||
sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (ext4_is_quota_journalled(sb)) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {

View File

@ -1927,7 +1927,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
} else {
/* Allocate a buffer where we construct the new block. */
s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
/* assert(header == s->base) */
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;

View File

@ -1869,9 +1869,7 @@ static int load_superblock(journal_t *journal)
if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
if (!num_fc_blocks)
num_fc_blocks = JBD2_MIN_FC_BLOCKS;
num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
journal->j_last = journal->j_fc_last - num_fc_blocks;
journal->j_fc_first = journal->j_last + 1;
@ -2102,9 +2100,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
journal_superblock_t *sb = journal->j_superblock;
unsigned long long num_fc_blks;
num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
if (num_fc_blks == 0)
num_fc_blks = JBD2_MIN_FC_BLOCKS;
num_fc_blks = jbd2_journal_get_num_fc_blks(sb);
if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
return -ENOSPC;

View File

@ -68,7 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags);
extern void jbd2_free(void *ptr, size_t size);
#define JBD2_MIN_JOURNAL_BLOCKS 1024
#define JBD2_MIN_FC_BLOCKS 256
#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256
#ifdef __KERNEL__
@ -538,6 +538,7 @@ struct transaction_chp_stats_s {
* The transaction keeps track of all of the buffers modified by a
* running transaction, and all of the buffers committed but not yet
* flushed to home for finished transactions.
* (Locking Documentation improved by LockDoc)
*/
/*
@ -658,12 +659,12 @@ struct transaction_s
unsigned long t_start;
/*
* When commit was requested
* When commit was requested [j_state_lock]
*/
unsigned long t_requested;
/*
* Checkpointing stats [j_checkpoint_sem]
* Checkpointing stats [j_list_lock]
*/
struct transaction_chp_stats_s t_chp_stats;
@ -1691,6 +1692,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
return journal->j_chksum_driver != NULL;
}
static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
{
int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks);
return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS;
}
/*
* Return number of free blocks in the log. Must be called under j_state_lock.
*/