2018-09-05 06:46:30 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0+ */
|
2009-04-07 10:01:23 +08:00
|
|
|
/*
|
2021-11-09 10:35:01 +08:00
|
|
|
* the_nilfs shared structure.
|
2009-04-07 10:01:23 +08:00
|
|
|
*
|
|
|
|
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
|
|
|
|
*
|
2016-05-24 07:23:09 +08:00
|
|
|
* Written by Ryusuke Konishi.
|
2009-04-07 10:01:23 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _THE_NILFS_H
|
|
|
|
#define _THE_NILFS_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/buffer_head.h>
|
2010-08-14 11:59:15 +08:00
|
|
|
#include <linux/rbtree.h>
|
2009-04-07 10:01:23 +08:00
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/blkdev.h>
|
|
|
|
#include <linux/backing-dev.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2017-11-18 07:29:39 +08:00
|
|
|
#include <linux/refcount.h>
|
2009-04-07 10:01:23 +08:00
|
|
|
|
2011-03-09 10:05:08 +08:00
|
|
|
struct nilfs_sc_info;
|
2014-08-09 05:20:42 +08:00
|
|
|
struct nilfs_sysfs_dev_subgroups;
|
2011-03-09 10:05:08 +08:00
|
|
|
|
2009-04-07 10:01:23 +08:00
|
|
|
/* the_nilfs struct */
|
|
|
|
enum {
|
|
|
|
THE_NILFS_INIT = 0, /* Information from super_block is set */
|
|
|
|
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
|
2009-09-03 21:24:17 +08:00
|
|
|
THE_NILFS_GC_RUNNING, /* gc process is running */
|
2009-12-08 23:57:52 +08:00
|
|
|
THE_NILFS_SB_DIRTY, /* super block is dirty */
|
nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
During unmount process of nilfs2, nothing holds nilfs_root structure after
nilfs2 detaches its writer in nilfs_detach_log_writer(). Previously,
nilfs_evict_inode() could cause use-after-free read for nilfs_root if
inodes are left in "garbage_list" and released by nilfs_dispose_list at
the end of nilfs_detach_log_writer(), and this bug was fixed by commit
9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root in
nilfs_evict_inode()").
However, it turned out that there is another possibility of UAF in the
call path where mark_inode_dirty_sync() is called from iput():
nilfs_detach_log_writer()
nilfs_dispose_list()
iput()
mark_inode_dirty_sync()
__mark_inode_dirty()
nilfs_dirty_inode()
__nilfs_mark_inode_dirty()
nilfs_load_inode_block() --> causes UAF of nilfs_root struct
This can happen after commit 0ae45f63d4ef ("vfs: add support for a
lazytime mount option"), which changed iput() to call
mark_inode_dirty_sync() on its final reference if i_state has I_DIRTY_TIME
flag and i_nlink is non-zero.
This issue appears after commit 28a65b49eb53 ("nilfs2: do not write dirty
data after degenerating to read-only") when using the syzbot reproducer,
but the issue has potentially existed before.
Fix this issue by adding a "purging flag" to the nilfs structure, setting
that flag while disposing the "garbage_list" and checking it in
__nilfs_mark_inode_dirty().
Unlike commit 9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root
in nilfs_evict_inode()"), this patch does not rely on ns_writer to
determine whether to skip operations, so as not to break recovery on
mount. The nilfs_salvage_orphan_logs routine dirties the buffer of
salvaged data before attaching the log writer, so changing
__nilfs_mark_inode_dirty() to skip the operation when ns_writer is NULL
will cause recovery write to fail. The purpose of using the cleanup-only
flag is to allow for narrowing of such conditions.
Link: https://lkml.kernel.org/r/20230728191318.33047-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+74db8b3087f293d3a13a@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/000000000000b4e906060113fd63@google.com
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org> # 4.0+
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-07-29 03:13:18 +08:00
|
|
|
THE_NILFS_PURGING, /* disposing dirty files for cleanup */
|
2009-04-07 10:01:23 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct the_nilfs - struct to supervise multiple nilfs mount points
|
|
|
|
* @ns_flags: flags
|
2014-10-14 06:53:20 +08:00
|
|
|
* @ns_flushed_device: flag indicating if all volatile data was flushed
|
2016-08-03 05:05:06 +08:00
|
|
|
* @ns_sb: back pointer to super block instance
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_bdev: block device
|
|
|
|
* @ns_sem: semaphore for shared states
|
2012-07-31 05:42:07 +08:00
|
|
|
* @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
|
2009-04-07 10:01:59 +08:00
|
|
|
* @ns_sbh: buffer heads of on-disk super blocks
|
|
|
|
* @ns_sbp: pointers to super block data
|
2010-06-28 16:49:33 +08:00
|
|
|
* @ns_sbwtime: previous write time of super block
|
|
|
|
* @ns_sbwcount: write count of super block
|
2009-04-07 10:01:59 +08:00
|
|
|
* @ns_sbsize: size of valid data in super block
|
2012-07-31 05:42:02 +08:00
|
|
|
* @ns_mount_state: file system state
|
2014-08-09 05:20:42 +08:00
|
|
|
* @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_seg_seq: segment sequence counter
|
|
|
|
* @ns_segnum: index number of the latest full segment.
|
|
|
|
* @ns_nextnum: index number of the full segment index to be used next
|
|
|
|
* @ns_pseg_offset: offset of next partial segment in the current full segment
|
|
|
|
* @ns_cno: next checkpoint number
|
|
|
|
* @ns_ctime: write time of the last segment
|
|
|
|
* @ns_nongc_ctime: write time of the last segment not for cleaner operation
|
|
|
|
* @ns_ndirtyblks: Number of dirty data blocks
|
|
|
|
* @ns_last_segment_lock: lock protecting fields for the latest segment
|
|
|
|
* @ns_last_pseg: start block number of the latest segment
|
|
|
|
* @ns_last_seq: sequence value of the latest segment
|
|
|
|
* @ns_last_cno: checkpoint number of the latest segment
|
2009-04-07 10:01:54 +08:00
|
|
|
* @ns_prot_seq: least sequence number of segments which must not be reclaimed
|
2010-06-29 13:42:13 +08:00
|
|
|
* @ns_prev_seq: base sequence number used to decide if advance log cursor
|
2011-03-09 10:05:08 +08:00
|
|
|
* @ns_writer: log writer
|
|
|
|
* @ns_segctor_sem: semaphore protecting log write
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_dat: DAT file inode
|
|
|
|
* @ns_cpfile: checkpoint file inode
|
|
|
|
* @ns_sufile: segusage file inode
|
2010-08-14 11:59:15 +08:00
|
|
|
* @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
|
|
|
|
* @ns_cptree_lock: lock protecting @ns_cptree
|
2011-03-09 10:05:07 +08:00
|
|
|
* @ns_dirty_files: list of dirty files
|
|
|
|
* @ns_inode_lock: lock protecting @ns_dirty_files
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_gc_inodes: dummy inodes to keep live blocks
|
2011-03-09 10:05:08 +08:00
|
|
|
* @ns_next_generation: next generation number for inodes
|
|
|
|
* @ns_next_gen_lock: lock protecting @ns_next_generation
|
2011-03-09 10:05:07 +08:00
|
|
|
* @ns_mount_opt: mount options
|
2011-03-09 10:05:07 +08:00
|
|
|
* @ns_resuid: uid for reserved blocks
|
|
|
|
* @ns_resgid: gid for reserved blocks
|
|
|
|
* @ns_interval: checkpoint creation interval
|
|
|
|
* @ns_watermark: watermark for the number of dirty buffers
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_blocksize_bits: bit length of block size
|
2010-05-22 23:17:48 +08:00
|
|
|
* @ns_blocksize: block size
|
2009-04-07 10:01:23 +08:00
|
|
|
* @ns_nsegments: number of segments in filesystem
|
|
|
|
* @ns_blocks_per_segment: number of blocks per segment
|
|
|
|
* @ns_r_segments_percentage: reserved segments percentage
|
|
|
|
* @ns_nrsvsegs: number of reserved segments
|
|
|
|
* @ns_first_data_block: block number of first data block
|
|
|
|
* @ns_inode_size: size of on-disk inode
|
|
|
|
* @ns_first_ino: first not-special inode number
|
|
|
|
* @ns_crc_seed: seed value of CRC32 calculation
|
2014-08-09 05:20:39 +08:00
|
|
|
* @ns_dev_kobj: /sys/fs/<nilfs>/<device>
|
|
|
|
* @ns_dev_kobj_unregister: completion state
|
2014-08-09 05:20:42 +08:00
|
|
|
* @ns_dev_subgroups: <device> subgroups pointer
|
2009-04-07 10:01:23 +08:00
|
|
|
*/
|
|
|
|
struct the_nilfs {
|
|
|
|
unsigned long ns_flags;
|
2014-10-14 06:53:20 +08:00
|
|
|
int ns_flushed_device;
|
2009-04-07 10:01:23 +08:00
|
|
|
|
2016-08-03 05:05:06 +08:00
|
|
|
struct super_block *ns_sb;
|
2009-04-07 10:01:23 +08:00
|
|
|
struct block_device *ns_bdev;
|
|
|
|
struct rw_semaphore ns_sem;
|
2012-07-31 05:42:07 +08:00
|
|
|
struct mutex ns_snapshot_mount_mutex;
|
2009-04-07 10:01:23 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* used for
|
|
|
|
* - loading the latest checkpoint exclusively.
|
|
|
|
* - allocating a new full segment.
|
|
|
|
*/
|
2009-04-07 10:01:59 +08:00
|
|
|
struct buffer_head *ns_sbh[2];
|
|
|
|
struct nilfs_super_block *ns_sbp[2];
|
2018-02-07 07:39:21 +08:00
|
|
|
time64_t ns_sbwtime;
|
2016-05-24 07:23:39 +08:00
|
|
|
unsigned int ns_sbwcount;
|
|
|
|
unsigned int ns_sbsize;
|
|
|
|
unsigned int ns_mount_state;
|
|
|
|
unsigned int ns_sb_update_freq;
|
2009-04-07 10:01:23 +08:00
|
|
|
|
|
|
|
/*
|
2016-08-03 05:05:25 +08:00
|
|
|
* The following fields are updated by a writable FS-instance.
|
|
|
|
* These fields are protected by ns_segctor_sem outside load_nilfs().
|
2009-04-07 10:01:23 +08:00
|
|
|
*/
|
|
|
|
u64 ns_seg_seq;
|
|
|
|
__u64 ns_segnum;
|
|
|
|
__u64 ns_nextnum;
|
|
|
|
unsigned long ns_pseg_offset;
|
|
|
|
__u64 ns_cno;
|
2018-02-07 07:39:21 +08:00
|
|
|
time64_t ns_ctime;
|
|
|
|
time64_t ns_nongc_ctime;
|
2009-04-07 10:01:23 +08:00
|
|
|
atomic_t ns_ndirtyblks;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following fields hold information on the latest partial segment
|
|
|
|
* written to disk with a super root. These fields are protected by
|
|
|
|
* ns_last_segment_lock.
|
|
|
|
*/
|
|
|
|
spinlock_t ns_last_segment_lock;
|
|
|
|
sector_t ns_last_pseg;
|
|
|
|
u64 ns_last_seq;
|
|
|
|
__u64 ns_last_cno;
|
2009-04-07 10:01:54 +08:00
|
|
|
u64 ns_prot_seq;
|
2010-06-29 13:42:13 +08:00
|
|
|
u64 ns_prev_seq;
|
2009-04-07 10:01:23 +08:00
|
|
|
|
2011-03-09 10:05:08 +08:00
|
|
|
struct nilfs_sc_info *ns_writer;
|
2009-04-07 10:01:23 +08:00
|
|
|
struct rw_semaphore ns_segctor_sem;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Following fields are lock free except for the period before
|
|
|
|
* the_nilfs is initialized.
|
|
|
|
*/
|
|
|
|
struct inode *ns_dat;
|
|
|
|
struct inode *ns_cpfile;
|
|
|
|
struct inode *ns_sufile;
|
|
|
|
|
2010-08-14 11:59:15 +08:00
|
|
|
/* Checkpoint tree */
|
|
|
|
struct rb_root ns_cptree;
|
|
|
|
spinlock_t ns_cptree_lock;
|
|
|
|
|
2011-03-09 10:05:07 +08:00
|
|
|
/* Dirty inode list */
|
|
|
|
struct list_head ns_dirty_files;
|
|
|
|
spinlock_t ns_inode_lock;
|
|
|
|
|
2010-08-20 18:06:11 +08:00
|
|
|
/* GC inode list */
|
2009-04-07 10:01:23 +08:00
|
|
|
struct list_head ns_gc_inodes;
|
|
|
|
|
2011-03-09 10:05:08 +08:00
|
|
|
/* Inode allocator */
|
|
|
|
u32 ns_next_generation;
|
|
|
|
spinlock_t ns_next_gen_lock;
|
|
|
|
|
2011-03-09 10:05:07 +08:00
|
|
|
/* Mount options */
|
|
|
|
unsigned long ns_mount_opt;
|
|
|
|
|
2011-03-09 10:05:07 +08:00
|
|
|
uid_t ns_resuid;
|
|
|
|
gid_t ns_resgid;
|
|
|
|
unsigned long ns_interval;
|
|
|
|
unsigned long ns_watermark;
|
|
|
|
|
2009-04-07 10:01:23 +08:00
|
|
|
/* Disk layout information (static) */
|
|
|
|
unsigned int ns_blocksize_bits;
|
2010-05-22 23:17:48 +08:00
|
|
|
unsigned int ns_blocksize;
|
2009-04-07 10:01:23 +08:00
|
|
|
unsigned long ns_nsegments;
|
|
|
|
unsigned long ns_blocks_per_segment;
|
|
|
|
unsigned long ns_r_segments_percentage;
|
|
|
|
unsigned long ns_nrsvsegs;
|
|
|
|
unsigned long ns_first_data_block;
|
|
|
|
int ns_inode_size;
|
nilfs2: fix inode number range checks
Patch series "nilfs2: fix potential issues related to reserved inodes".
This series fixes one use-after-free issue reported by syzbot, caused by
nilfs2's internal inode being exposed in the namespace on a corrupted
filesystem, and a couple of flaws that cause problems if the starting
number of non-reserved inodes written in the on-disk super block is
intentionally (or corruptly) changed from its default value.
This patch (of 3):
In the current implementation of nilfs2, "nilfs->ns_first_ino", which
gives the first non-reserved inode number, is read from the superblock,
but its lower limit is not checked.
As a result, if a number that overlaps with the inode number range of
reserved inodes such as the root directory or metadata files is set in the
super block parameter, the inode number test macros (NILFS_MDT_INODE and
NILFS_VALID_INODE) will not function properly.
In addition, these test macros use left bit-shift calculations using with
the inode number as the shift count via the BIT macro, but the result of a
shift calculation that exceeds the bit width of an integer is undefined in
the C specification, so if "ns_first_ino" is set to a large value other
than the default value NILFS_USER_INO (=11), the macros may potentially
malfunction depending on the environment.
Fix these issues by checking the lower bound of "nilfs->ns_first_ino" and
by preventing bit shifts equal to or greater than the NILFS_USER_INO
constant in the inode number test macros.
Also, change the type of "ns_first_ino" from signed integer to unsigned
integer to avoid the need for type casting in comparisons such as the
lower bound check introduced this time.
Link: https://lkml.kernel.org/r/20240623051135.4180-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240623051135.4180-2-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-23 13:11:33 +08:00
|
|
|
unsigned int ns_first_ino;
|
2009-04-07 10:01:23 +08:00
|
|
|
u32 ns_crc_seed;
|
2014-08-09 05:20:39 +08:00
|
|
|
|
|
|
|
/* /sys/fs/<nilfs>/<device> */
|
|
|
|
struct kobject ns_dev_kobj;
|
|
|
|
struct completion ns_dev_kobj_unregister;
|
2014-08-09 05:20:42 +08:00
|
|
|
struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
|
2009-04-07 10:01:23 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define THE_NILFS_FNS(bit, name) \
|
|
|
|
static inline void set_nilfs_##name(struct the_nilfs *nilfs) \
|
|
|
|
{ \
|
|
|
|
set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
|
|
|
|
} \
|
|
|
|
static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \
|
|
|
|
{ \
|
|
|
|
clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
|
|
|
|
} \
|
|
|
|
static inline int nilfs_##name(struct the_nilfs *nilfs) \
|
|
|
|
{ \
|
|
|
|
return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
|
|
|
|
}
|
|
|
|
|
|
|
|
THE_NILFS_FNS(INIT, init)
|
|
|
|
THE_NILFS_FNS(DISCONTINUED, discontinued)
|
2009-09-03 21:24:17 +08:00
|
|
|
THE_NILFS_FNS(GC_RUNNING, gc_running)
|
2009-12-08 23:57:52 +08:00
|
|
|
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
|
nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
During unmount process of nilfs2, nothing holds nilfs_root structure after
nilfs2 detaches its writer in nilfs_detach_log_writer(). Previously,
nilfs_evict_inode() could cause use-after-free read for nilfs_root if
inodes are left in "garbage_list" and released by nilfs_dispose_list at
the end of nilfs_detach_log_writer(), and this bug was fixed by commit
9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root in
nilfs_evict_inode()").
However, it turned out that there is another possibility of UAF in the
call path where mark_inode_dirty_sync() is called from iput():
nilfs_detach_log_writer()
nilfs_dispose_list()
iput()
mark_inode_dirty_sync()
__mark_inode_dirty()
nilfs_dirty_inode()
__nilfs_mark_inode_dirty()
nilfs_load_inode_block() --> causes UAF of nilfs_root struct
This can happen after commit 0ae45f63d4ef ("vfs: add support for a
lazytime mount option"), which changed iput() to call
mark_inode_dirty_sync() on its final reference if i_state has I_DIRTY_TIME
flag and i_nlink is non-zero.
This issue appears after commit 28a65b49eb53 ("nilfs2: do not write dirty
data after degenerating to read-only") when using the syzbot reproducer,
but the issue has potentially existed before.
Fix this issue by adding a "purging flag" to the nilfs structure, setting
that flag while disposing the "garbage_list" and checking it in
__nilfs_mark_inode_dirty().
Unlike commit 9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root
in nilfs_evict_inode()"), this patch does not rely on ns_writer to
determine whether to skip operations, so as not to break recovery on
mount. The nilfs_salvage_orphan_logs routine dirties the buffer of
salvaged data before attaching the log writer, so changing
__nilfs_mark_inode_dirty() to skip the operation when ns_writer is NULL
will cause recovery write to fail. The purpose of using the cleanup-only
flag is to allow for narrowing of such conditions.
Link: https://lkml.kernel.org/r/20230728191318.33047-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+74db8b3087f293d3a13a@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/000000000000b4e906060113fd63@google.com
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org> # 4.0+
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-07-29 03:13:18 +08:00
|
|
|
THE_NILFS_FNS(PURGING, purging)
|
2009-04-07 10:01:23 +08:00
|
|
|
|
2011-03-09 10:05:07 +08:00
|
|
|
/*
|
|
|
|
* Mount option operations
|
|
|
|
*/
|
|
|
|
#define nilfs_clear_opt(nilfs, opt) \
|
2016-05-24 07:23:45 +08:00
|
|
|
((nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt)
|
2011-03-09 10:05:07 +08:00
|
|
|
#define nilfs_set_opt(nilfs, opt) \
|
2016-05-24 07:23:45 +08:00
|
|
|
((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt)
|
2011-03-09 10:05:07 +08:00
|
|
|
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
|
|
|
|
|
2010-08-14 11:59:15 +08:00
|
|
|
/**
|
|
|
|
* struct nilfs_root - nilfs root object
|
|
|
|
* @cno: checkpoint number
|
|
|
|
* @rb_node: red-black tree node
|
|
|
|
* @count: refcount of this structure
|
|
|
|
* @nilfs: nilfs object
|
|
|
|
* @ifile: inode file
|
|
|
|
* @inodes_count: number of inodes
|
2012-07-31 05:42:10 +08:00
|
|
|
* @blocks_count: number of blocks
|
2014-08-09 05:20:52 +08:00
|
|
|
* @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
|
|
|
|
* @snapshot_kobj_unregister: completion state for kernel object
|
2010-08-14 11:59:15 +08:00
|
|
|
*/
|
|
|
|
struct nilfs_root {
|
|
|
|
__u64 cno;
|
|
|
|
struct rb_node rb_node;
|
|
|
|
|
2017-11-18 07:29:39 +08:00
|
|
|
refcount_t count;
|
2010-08-14 11:59:15 +08:00
|
|
|
struct the_nilfs *nilfs;
|
|
|
|
struct inode *ifile;
|
|
|
|
|
2013-07-04 06:08:06 +08:00
|
|
|
atomic64_t inodes_count;
|
|
|
|
atomic64_t blocks_count;
|
2014-08-09 05:20:52 +08:00
|
|
|
|
|
|
|
/* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
|
|
|
|
struct kobject snapshot_kobj;
|
|
|
|
struct completion snapshot_kobj_unregister;
|
2010-08-14 11:59:15 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Special checkpoint number */
|
|
|
|
#define NILFS_CPTREE_CURRENT_CNO 0
|
|
|
|
|
2009-04-07 10:01:59 +08:00
|
|
|
/* Minimum interval of periodical update of superblocks (in seconds) */
|
|
|
|
#define NILFS_SB_FREQ 10
|
|
|
|
|
2009-07-23 00:26:34 +08:00
|
|
|
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
|
|
|
|
{
|
2018-02-07 07:39:21 +08:00
|
|
|
u64 t = ktime_get_real_seconds();
|
2016-05-24 07:23:25 +08:00
|
|
|
|
2014-08-09 05:20:42 +08:00
|
|
|
return t < nilfs->ns_sbwtime ||
|
|
|
|
t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
|
2009-07-23 00:26:34 +08:00
|
|
|
}
|
|
|
|
|
2010-06-28 16:49:33 +08:00
|
|
|
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
|
2009-07-23 00:26:34 +08:00
|
|
|
{
|
2010-06-28 16:49:33 +08:00
|
|
|
int flip_bits = nilfs->ns_sbwcount & 0x0FL;
|
2016-05-24 07:23:25 +08:00
|
|
|
|
2010-06-28 16:49:33 +08:00
|
|
|
return (flip_bits != 0x08 && flip_bits != 0x0F);
|
2009-07-23 00:26:34 +08:00
|
|
|
}
|
|
|
|
|
2009-04-07 10:01:23 +08:00
|
|
|
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
|
2016-08-03 05:05:06 +08:00
|
|
|
struct the_nilfs *alloc_nilfs(struct super_block *sb);
|
2010-09-09 01:07:56 +08:00
|
|
|
void destroy_nilfs(struct the_nilfs *nilfs);
|
2024-04-25 02:27:16 +08:00
|
|
|
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
|
2011-03-09 10:05:08 +08:00
|
|
|
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
|
2011-05-05 00:23:58 +08:00
|
|
|
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
|
|
|
|
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
|
2010-01-30 17:06:35 +08:00
|
|
|
int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
|
2009-04-07 10:01:23 +08:00
|
|
|
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
|
2010-08-14 11:59:15 +08:00
|
|
|
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
|
|
|
|
struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
|
|
|
|
__u64 cno);
|
|
|
|
void nilfs_put_root(struct nilfs_root *root);
|
2009-04-07 10:01:23 +08:00
|
|
|
int nilfs_near_disk_full(struct the_nilfs *);
|
2009-04-07 10:01:59 +08:00
|
|
|
void nilfs_fall_back_super_block(struct the_nilfs *);
|
|
|
|
void nilfs_swap_super_block(struct the_nilfs *);
|
2009-04-07 10:01:23 +08:00
|
|
|
|
|
|
|
|
2010-08-14 11:59:15 +08:00
|
|
|
static inline void nilfs_get_root(struct nilfs_root *root)
|
|
|
|
{
|
2017-11-18 07:29:39 +08:00
|
|
|
refcount_inc(&root->count);
|
2010-08-14 11:59:15 +08:00
|
|
|
}
|
|
|
|
|
2009-11-19 18:58:46 +08:00
|
|
|
static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
|
|
|
|
{
|
2016-05-24 07:23:39 +08:00
|
|
|
unsigned int valid_fs;
|
2009-11-19 18:58:46 +08:00
|
|
|
|
|
|
|
down_read(&nilfs->ns_sem);
|
|
|
|
valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
|
|
|
|
up_read(&nilfs->ns_sem);
|
|
|
|
return valid_fs;
|
|
|
|
}
|
|
|
|
|
2009-04-07 10:01:23 +08:00
|
|
|
static inline void
|
|
|
|
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
|
|
|
|
sector_t *seg_start, sector_t *seg_end)
|
|
|
|
{
|
|
|
|
*seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum;
|
|
|
|
*seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1;
|
|
|
|
if (segnum == 0)
|
|
|
|
*seg_start = nilfs->ns_first_data_block;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline sector_t
|
|
|
|
nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum)
|
|
|
|
{
|
|
|
|
return (segnum == 0) ? nilfs->ns_first_data_block :
|
|
|
|
(sector_t)nilfs->ns_blocks_per_segment * segnum;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __u64
|
|
|
|
nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr)
|
|
|
|
{
|
|
|
|
sector_t segnum = blocknr;
|
|
|
|
|
|
|
|
sector_div(segnum, nilfs->ns_blocks_per_segment);
|
|
|
|
return segnum;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start,
|
|
|
|
sector_t seg_end)
|
|
|
|
{
|
|
|
|
/* terminate the current full segment (used in case of I/O-error) */
|
|
|
|
nilfs->ns_pseg_offset = seg_end - seg_start + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs)
|
|
|
|
{
|
|
|
|
/* move forward with a full segment */
|
|
|
|
nilfs->ns_segnum = nilfs->ns_nextnum;
|
|
|
|
nilfs->ns_pseg_offset = 0;
|
|
|
|
nilfs->ns_seg_seq++;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs)
|
|
|
|
{
|
|
|
|
__u64 cno;
|
|
|
|
|
|
|
|
spin_lock(&nilfs->ns_last_segment_lock);
|
|
|
|
cno = nilfs->ns_last_cno;
|
|
|
|
spin_unlock(&nilfs->ns_last_segment_lock);
|
|
|
|
return cno;
|
|
|
|
}
|
|
|
|
|
2009-04-07 10:01:58 +08:00
|
|
|
static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
|
|
|
|
{
|
|
|
|
return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
|
|
|
|
}
|
|
|
|
|
2014-10-14 06:53:20 +08:00
|
|
|
static inline int nilfs_flush_device(struct the_nilfs *nilfs)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nilfs->ns_flushed_device = 1;
|
|
|
|
/*
|
|
|
|
* the store to ns_flushed_device must not be reordered after
|
|
|
|
* blkdev_issue_flush().
|
|
|
|
*/
|
|
|
|
smp_wmb();
|
|
|
|
|
2021-01-26 22:52:35 +08:00
|
|
|
err = blkdev_issue_flush(nilfs->ns_bdev);
|
2014-10-14 06:53:20 +08:00
|
|
|
if (err != -EIO)
|
|
|
|
err = 0;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2009-04-07 10:01:23 +08:00
|
|
|
#endif /* _THE_NILFS_H */
|