2012-11-29 12:28:09 +08:00
|
|
|
/*
|
2012-11-02 16:10:40 +08:00
|
|
|
* fs/f2fs/inode.c
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
|
|
|
#include <linux/buffer_head.h>
|
2016-09-10 07:59:39 +08:00
|
|
|
#include <linux/backing-dev.h>
|
2012-11-02 16:10:40 +08:00
|
|
|
#include <linux/writeback.h>
|
|
|
|
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "node.h"
|
2017-06-14 23:00:56 +08:00
|
|
|
#include "segment.h"
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2013-04-20 00:28:40 +08:00
|
|
|
#include <trace/events/f2fs.h>
|
|
|
|
|
2016-10-15 02:51:23 +08:00
|
|
|
void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
|
2016-07-01 10:09:37 +08:00
|
|
|
{
|
2018-01-11 10:26:19 +08:00
|
|
|
if (is_inode_flag_set(inode, FI_NEW_INODE))
|
|
|
|
return;
|
|
|
|
|
2016-10-15 02:51:23 +08:00
|
|
|
if (f2fs_inode_dirtied(inode, sync))
|
2016-07-01 10:09:37 +08:00
|
|
|
return;
|
2016-10-15 02:51:23 +08:00
|
|
|
|
2016-07-01 10:09:37 +08:00
|
|
|
mark_inode_dirty_sync(inode);
|
|
|
|
}
|
|
|
|
|
2012-11-02 16:10:40 +08:00
|
|
|
void f2fs_set_inode_flags(struct inode *inode)
|
|
|
|
{
|
|
|
|
unsigned int flags = F2FS_I(inode)->i_flags;
|
2014-04-15 14:19:38 +08:00
|
|
|
unsigned int new_fl = 0;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2018-04-03 15:08:17 +08:00
|
|
|
if (flags & F2FS_SYNC_FL)
|
2014-04-15 14:19:38 +08:00
|
|
|
new_fl |= S_SYNC;
|
2018-04-03 15:08:17 +08:00
|
|
|
if (flags & F2FS_APPEND_FL)
|
2014-04-15 14:19:38 +08:00
|
|
|
new_fl |= S_APPEND;
|
2018-04-03 15:08:17 +08:00
|
|
|
if (flags & F2FS_IMMUTABLE_FL)
|
2014-04-15 14:19:38 +08:00
|
|
|
new_fl |= S_IMMUTABLE;
|
2018-04-03 15:08:17 +08:00
|
|
|
if (flags & F2FS_NOATIME_FL)
|
2014-04-15 14:19:38 +08:00
|
|
|
new_fl |= S_NOATIME;
|
2018-04-03 15:08:17 +08:00
|
|
|
if (flags & F2FS_DIRSYNC_FL)
|
2014-04-15 14:19:38 +08:00
|
|
|
new_fl |= S_DIRSYNC;
|
2017-10-10 03:15:35 +08:00
|
|
|
if (f2fs_encrypted_inode(inode))
|
|
|
|
new_fl |= S_ENCRYPTED;
|
2015-08-24 10:41:32 +08:00
|
|
|
inode_set_flags(inode, new_fl,
|
2017-10-10 03:15:35 +08:00
|
|
|
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
|
|
|
|
S_ENCRYPTED);
|
2012-11-02 16:10:40 +08:00
|
|
|
}
|
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
|
|
|
|
{
|
2017-07-19 00:19:06 +08:00
|
|
|
int extra_size = get_extra_isize(inode);
|
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
|
|
|
|
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
|
2017-07-19 00:19:06 +08:00
|
|
|
if (ri->i_addr[extra_size])
|
|
|
|
inode->i_rdev = old_decode_dev(
|
|
|
|
le32_to_cpu(ri->i_addr[extra_size]));
|
2013-10-08 17:01:51 +08:00
|
|
|
else
|
2017-07-19 00:19:06 +08:00
|
|
|
inode->i_rdev = new_decode_dev(
|
|
|
|
le32_to_cpu(ri->i_addr[extra_size + 1]));
|
2013-10-08 17:01:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-18 08:16:35 +08:00
|
|
|
static bool __written_first_block(struct f2fs_inode *ri)
|
|
|
|
{
|
2017-07-19 00:19:06 +08:00
|
|
|
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
|
2015-03-25 03:04:20 +08:00
|
|
|
|
2018-05-23 22:25:08 +08:00
|
|
|
if (is_valid_blkaddr(addr))
|
2015-03-18 08:16:35 +08:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
|
|
|
|
{
|
2017-07-19 00:19:06 +08:00
|
|
|
int extra_size = get_extra_isize(inode);
|
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
|
|
|
|
if (old_valid_dev(inode->i_rdev)) {
|
2017-07-19 00:19:06 +08:00
|
|
|
ri->i_addr[extra_size] =
|
2014-01-18 04:44:39 +08:00
|
|
|
cpu_to_le32(old_encode_dev(inode->i_rdev));
|
2017-07-19 00:19:06 +08:00
|
|
|
ri->i_addr[extra_size + 1] = 0;
|
2013-10-08 17:01:51 +08:00
|
|
|
} else {
|
2017-07-19 00:19:06 +08:00
|
|
|
ri->i_addr[extra_size] = 0;
|
|
|
|
ri->i_addr[extra_size + 1] =
|
2014-01-18 04:44:39 +08:00
|
|
|
cpu_to_le32(new_encode_dev(inode->i_rdev));
|
2017-07-19 00:19:06 +08:00
|
|
|
ri->i_addr[extra_size + 2] = 0;
|
2013-10-08 17:01:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-06 14:28:43 +08:00
|
|
|
static void __recover_inline_status(struct inode *inode, struct page *ipage)
|
2014-10-24 10:48:09 +08:00
|
|
|
{
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_data = inline_data_addr(inode, ipage);
|
2015-01-06 14:28:43 +08:00
|
|
|
__le32 *start = inline_data;
|
2017-07-19 00:19:05 +08:00
|
|
|
__le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32);
|
2014-10-24 10:48:09 +08:00
|
|
|
|
2015-01-06 14:28:43 +08:00
|
|
|
while (start < end) {
|
|
|
|
if (*start++) {
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(ipage, NODE, true);
|
2014-10-24 10:48:09 +08:00
|
|
|
|
2016-05-21 01:13:22 +08:00
|
|
|
set_inode_flag(inode, FI_DATA_EXIST);
|
|
|
|
set_raw_inline(inode, F2FS_INODE(ipage));
|
2015-01-06 14:28:43 +08:00
|
|
|
set_page_dirty(ipage);
|
|
|
|
return;
|
|
|
|
}
|
2014-10-24 10:48:09 +08:00
|
|
|
}
|
2015-01-06 14:28:43 +08:00
|
|
|
return;
|
2014-10-24 10:48:09 +08:00
|
|
|
}
|
|
|
|
|
2017-07-31 20:19:09 +08:00
|
|
|
static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
|
|
|
|
{
|
|
|
|
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
|
|
|
|
|
|
|
|
if (!f2fs_sb_has_inode_chksum(sbi->sb))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
|
|
|
|
return false;
|
|
|
|
|
2018-04-14 01:02:34 +08:00
|
|
|
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
|
|
|
|
i_inode_checksum))
|
2017-07-31 20:19:09 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
|
|
|
|
{
|
|
|
|
struct f2fs_node *node = F2FS_NODE(page);
|
|
|
|
struct f2fs_inode *ri = &node->i;
|
|
|
|
__le32 ino = node->footer.ino;
|
|
|
|
__le32 gen = ri->i_generation;
|
|
|
|
__u32 chksum, chksum_seed;
|
|
|
|
__u32 dummy_cs = 0;
|
|
|
|
unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum);
|
|
|
|
unsigned int cs_size = sizeof(dummy_cs);
|
|
|
|
|
|
|
|
chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino,
|
|
|
|
sizeof(ino));
|
|
|
|
chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen));
|
|
|
|
|
|
|
|
chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset);
|
|
|
|
chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size);
|
|
|
|
offset += cs_size;
|
|
|
|
chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset,
|
|
|
|
F2FS_BLKSIZE - offset);
|
|
|
|
return chksum;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
|
|
|
|
{
|
|
|
|
struct f2fs_inode *ri;
|
|
|
|
__u32 provided, calculated;
|
|
|
|
|
2018-06-22 04:46:23 +08:00
|
|
|
if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
|
|
|
|
return true;
|
|
|
|
|
2018-03-09 23:10:21 +08:00
|
|
|
#ifdef CONFIG_F2FS_CHECK_FS
|
|
|
|
if (!f2fs_enable_inode_chksum(sbi, page))
|
|
|
|
#else
|
2017-09-01 07:54:51 +08:00
|
|
|
if (!f2fs_enable_inode_chksum(sbi, page) ||
|
|
|
|
PageDirty(page) || PageWriteback(page))
|
2018-03-09 23:10:21 +08:00
|
|
|
#endif
|
2017-07-31 20:19:09 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
ri = &F2FS_NODE(page)->i;
|
|
|
|
provided = le32_to_cpu(ri->i_inode_checksum);
|
|
|
|
calculated = f2fs_inode_chksum(sbi, page);
|
|
|
|
|
|
|
|
if (provided != calculated)
|
|
|
|
f2fs_msg(sbi->sb, KERN_WARNING,
|
|
|
|
"checksum invalid, ino = %x, %x vs. %x",
|
|
|
|
ino_of_node(page), provided, calculated);
|
|
|
|
|
|
|
|
return provided == calculated;
|
|
|
|
}
|
|
|
|
|
|
|
|
void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
|
|
|
|
{
|
|
|
|
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
|
|
|
|
|
|
|
|
if (!f2fs_enable_inode_chksum(sbi, page))
|
|
|
|
return;
|
|
|
|
|
|
|
|
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
|
|
|
|
}
|
|
|
|
|
2018-04-25 01:37:18 +08:00
|
|
|
static bool sanity_check_inode(struct inode *inode)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
|
|
|
|
&& !f2fs_has_extra_attr(inode)) {
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
|
|
f2fs_msg(sbi->sb, KERN_WARNING,
|
|
|
|
"%s: corrupted inode ino=%lx, run fsck to fix.",
|
|
|
|
__func__, inode->i_ino);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-11-02 16:10:40 +08:00
|
|
|
static int do_read_inode(struct inode *inode)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
struct page *node_page;
|
|
|
|
struct f2fs_inode *ri;
|
2017-07-26 00:01:41 +08:00
|
|
|
projid_t i_projid;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
/* Check if ino is within scope */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
if (f2fs_check_nid_range(sbi, inode->i_ino))
|
2013-03-17 16:27:20 +08:00
|
|
|
return -EINVAL;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
node_page = f2fs_get_node_page(sbi, inode->i_ino);
|
2012-11-02 16:10:40 +08:00
|
|
|
if (IS_ERR(node_page))
|
|
|
|
return PTR_ERR(node_page);
|
|
|
|
|
2013-12-26 15:30:41 +08:00
|
|
|
ri = F2FS_INODE(node_page);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
inode->i_mode = le16_to_cpu(ri->i_mode);
|
|
|
|
i_uid_write(inode, le32_to_cpu(ri->i_uid));
|
|
|
|
i_gid_write(inode, le32_to_cpu(ri->i_gid));
|
|
|
|
set_nlink(inode, le32_to_cpu(ri->i_links));
|
|
|
|
inode->i_size = le64_to_cpu(ri->i_size);
|
f2fs: don't count inode block in in-memory inode.i_blocks
Previously, we count all inode consumed blocks including inode block,
xattr block, index block, data block into i_blocks, for other generic
filesystems, they won't count inode block into i_blocks, so for
userspace applications or quota system, they may detect incorrect block
count according to i_blocks value in inode.
This patch changes to count all blocks into inode.i_blocks excluding
inode block, for on-disk i_blocks, we keep counting inode block for
backward compatibility.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-06 01:11:31 +08:00
|
|
|
inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
|
|
|
|
inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
|
|
|
|
inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
|
|
|
|
inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
|
|
|
|
inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
|
|
|
|
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
|
|
|
|
inode->i_generation = le32_to_cpu(ri->i_generation);
|
2018-05-07 20:28:52 +08:00
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
|
|
fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
|
|
|
|
else if (S_ISREG(inode->i_mode))
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 20:28:54 +08:00
|
|
|
fi->i_gc_failures[GC_FAILURE_PIN] =
|
|
|
|
le16_to_cpu(ri->i_gc_failures);
|
2012-11-02 16:10:40 +08:00
|
|
|
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
|
|
|
|
fi->i_flags = le32_to_cpu(ri->i_flags);
|
|
|
|
fi->flags = 0;
|
|
|
|
fi->i_advise = ri->i_advise;
|
f2fs: fix tracking parent inode number
Previously, f2fs didn't track the parent inode number correctly which is stored
in each f2fs_inode. In the case of the following scenario, a bug can be occured.
Let's suppose there are one directory, "/b", and two files, "/a" and "/b/a".
- pino of "/a" is ROOT_INO.
- pino of "/b/a" is DIR_B_INO.
Then,
# sync
: The inode pages of "/a" and "/b/a" contain the parent inode numbers as
ROOT_INO and DIR_B_INO respectively.
# mv /a /b/a
: The parent inode number of "/a" should be changed to DIR_B_INO, but f2fs
didn't do that. Ref. f2fs_set_link().
In order to fix this clearly, I added i_pino in f2fs_inode_info, and whenever
it needs to be changed like in f2fs_add_link() and f2fs_set_link(), it is
updated temporarily in f2fs_inode_info.
And later, f2fs_write_inode() stores the latest information to the inode pages.
For power-off-recovery, f2fs_sync_file() triggers simply f2fs_write_inode().
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-12-10 16:52:48 +08:00
|
|
|
fi->i_pino = le32_to_cpu(ri->i_pino);
|
2014-02-27 17:20:00 +08:00
|
|
|
fi->i_dir_level = ri->i_dir_level;
|
2013-10-08 17:01:51 +08:00
|
|
|
|
2015-12-29 03:39:06 +08:00
|
|
|
if (f2fs_init_extent_tree(inode, &ri->i_ext))
|
|
|
|
set_page_dirty(node_page);
|
2015-02-05 17:46:29 +08:00
|
|
|
|
2016-05-21 01:13:22 +08:00
|
|
|
get_inline_info(inode, ri);
|
2013-10-08 17:01:51 +08:00
|
|
|
|
2017-07-19 00:19:06 +08:00
|
|
|
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
|
|
|
|
le16_to_cpu(ri->i_extra_isize) : 0;
|
|
|
|
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
|
|
|
|
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
|
|
|
|
} else if (f2fs_has_inline_xattr(inode) ||
|
|
|
|
f2fs_has_inline_dentry(inode)) {
|
|
|
|
fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
|
|
|
|
} else {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Previous inline data or directory always reserved 200 bytes
|
|
|
|
* in inode layout, even if inline_xattr is disabled. In order
|
|
|
|
* to keep inline_dentry's structure for backward compatibility,
|
|
|
|
* we get the space back only from inline_data.
|
|
|
|
*/
|
|
|
|
fi->i_inline_xattr_size = 0;
|
|
|
|
}
|
|
|
|
|
2014-10-24 10:48:09 +08:00
|
|
|
/* check data exist */
|
|
|
|
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
|
2015-01-06 14:28:43 +08:00
|
|
|
__recover_inline_status(inode, node_page);
|
2014-10-24 10:48:09 +08:00
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
/* get rdev by using inline_info */
|
|
|
|
__get_inode_rdev(inode, ri);
|
|
|
|
|
2015-03-18 08:16:35 +08:00
|
|
|
if (__written_first_block(ri))
|
2016-05-21 01:13:22 +08:00
|
|
|
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
|
2015-03-18 08:16:35 +08:00
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
|
2016-05-21 11:42:37 +08:00
|
|
|
fi->last_disk_size = inode->i_size;
|
|
|
|
|
2018-04-03 15:08:17 +08:00
|
|
|
if (fi->i_flags & F2FS_PROJINHERIT_FL)
|
2017-07-26 00:01:41 +08:00
|
|
|
set_inode_flag(inode, FI_PROJ_INHERIT);
|
|
|
|
|
|
|
|
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
|
|
|
|
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
|
|
|
|
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
|
|
|
|
else
|
|
|
|
i_projid = F2FS_DEF_PROJID;
|
|
|
|
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
|
|
|
|
|
2018-01-25 14:54:42 +08:00
|
|
|
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
|
|
|
|
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
|
|
|
|
fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
|
|
|
|
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
|
|
|
|
}
|
|
|
|
|
vfs: change inode times to use struct timespec64
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.
The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.
The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.
virtual patch
@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
current_time ( ... )
{
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
...
- return timespec_trunc(
+ return timespec64_trunc(
... );
}
@ depends on patch @
identifier xtime;
@@
struct \( iattr \| inode \| kstat \) {
...
- struct timespec xtime;
+ struct timespec64 xtime;
...
}
@ depends on patch @
identifier t;
@@
struct inode_operations {
...
int (*update_time) (...,
- struct timespec t,
+ struct timespec64 t,
...);
...
}
@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
...) { ... }
@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
) { ... }
@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)
<+... when != ts
(
- timespec_equal(&inode_node->i_xtime, &ts)
+ timespec64_equal(&inode_node->i_xtime, &ts)
|
- timespec_equal(&ts, &inode_node->i_xtime)
+ timespec64_equal(&ts, &inode_node->i_xtime)
|
- timespec_compare(&inode_node->i_xtime, &ts)
+ timespec64_compare(&inode_node->i_xtime, &ts)
|
- timespec_compare(&ts, &inode_node->i_xtime)
+ timespec64_compare(&ts, &inode_node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., &ts,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts(&ts)
+ ktime_get_real_ts64(&ts)
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
|
- timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
+ timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
|
- timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(&attr1->ia_xtime1)
+ ktime_get_real_ts64(&attr1->ia_xtime1)
|
- ktime_get_real_ts(&attr.ia_xtime1)
+ ktime_get_real_ts64(&attr.ia_xtime1)
)
@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- &attr->ia_xtime,
+ &ts,
...);
)
...+>
}
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(stat->xtime);
ret = fn (...,
- &stat->xtime);
+ &ts);
)
...+>
}
@ depends on patch @
struct inode *node;
struct inode *node2;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier i_xtime3 =~ "^i_[acm]time$";
struct iattr *attrp;
struct iattr *attrp2;
struct iattr attr ;
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
struct kstat *stat;
struct kstat stat1;
struct timespec64 ts;
identifier xtime =~ "^[acmb]time$";
expression e;
@@
(
( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ;
|
node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
stat->xtime = node2->i_xtime1;
|
stat1.xtime = node2->i_xtime1;
|
( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ;
|
( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
|
- e = node->i_xtime1;
+ e = timespec64_to_timespec( node->i_xtime1 );
|
- e = attrp->ia_xtime1;
+ e = timespec64_to_timespec( attrp->ia_xtime1 );
|
node->i_xtime1 = current_time(...);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
- node->i_xtime1 = e;
+ node->i_xtime1 = timespec_to_timespec64(e);
)
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: <anton@tuxera.com>
Cc: <balbi@kernel.org>
Cc: <bfields@fieldses.org>
Cc: <darrick.wong@oracle.com>
Cc: <dhowells@redhat.com>
Cc: <dsterba@suse.com>
Cc: <dwmw2@infradead.org>
Cc: <hch@lst.de>
Cc: <hirofumi@mail.parknet.co.jp>
Cc: <hubcap@omnibond.com>
Cc: <jack@suse.com>
Cc: <jaegeuk@kernel.org>
Cc: <jaharkes@cs.cmu.edu>
Cc: <jslaby@suse.com>
Cc: <keescook@chromium.org>
Cc: <mark@fasheh.com>
Cc: <miklos@szeredi.hu>
Cc: <nico@linaro.org>
Cc: <reiserfs-devel@vger.kernel.org>
Cc: <richard@nod.at>
Cc: <sage@redhat.com>
Cc: <sfrench@samba.org>
Cc: <swhiteho@redhat.com>
Cc: <tj@kernel.org>
Cc: <trond.myklebust@primarydata.com>
Cc: <tytso@mit.edu>
Cc: <viro@zeniv.linux.org.uk>
2018-05-09 10:36:02 +08:00
|
|
|
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
|
|
|
|
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
|
|
|
|
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
|
2018-03-30 13:50:41 +08:00
|
|
|
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
|
2012-11-02 16:10:40 +08:00
|
|
|
f2fs_put_page(node_page, 1);
|
2014-12-06 02:51:50 +08:00
|
|
|
|
2015-07-15 17:28:53 +08:00
|
|
|
stat_inc_inline_xattr(inode);
|
2014-12-06 02:51:50 +08:00
|
|
|
stat_inc_inline_inode(inode);
|
|
|
|
stat_inc_inline_dir(inode);
|
|
|
|
|
2015-01-06 14:28:43 +08:00
|
|
|
return 0;
|
2012-11-02 16:10:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
struct inode *inode;
|
2013-04-20 00:28:40 +08:00
|
|
|
int ret = 0;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
inode = iget_locked(sb, ino);
|
|
|
|
if (!inode)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
2013-04-20 00:28:40 +08:00
|
|
|
|
|
|
|
if (!(inode->i_state & I_NEW)) {
|
|
|
|
trace_f2fs_iget(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
return inode;
|
2013-04-20 00:28:40 +08:00
|
|
|
}
|
2012-11-02 16:10:40 +08:00
|
|
|
if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
|
|
|
|
goto make_now;
|
|
|
|
|
|
|
|
ret = do_read_inode(inode);
|
|
|
|
if (ret)
|
|
|
|
goto bad_inode;
|
2018-04-25 01:37:18 +08:00
|
|
|
if (!sanity_check_inode(inode)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto bad_inode;
|
|
|
|
}
|
2012-11-02 16:10:40 +08:00
|
|
|
make_now:
|
|
|
|
if (ino == F2FS_NODE_INO(sbi)) {
|
|
|
|
inode->i_mapping->a_ops = &f2fs_node_aops;
|
2018-04-09 20:25:06 +08:00
|
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
|
2012-11-02 16:10:40 +08:00
|
|
|
} else if (ino == F2FS_META_INO(sbi)) {
|
|
|
|
inode->i_mapping->a_ops = &f2fs_meta_aops;
|
2018-04-09 20:25:06 +08:00
|
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
|
2012-11-02 16:10:40 +08:00
|
|
|
} else if (S_ISREG(inode->i_mode)) {
|
|
|
|
inode->i_op = &f2fs_file_inode_operations;
|
|
|
|
inode->i_fop = &f2fs_file_operations;
|
|
|
|
inode->i_mapping->a_ops = &f2fs_dblock_aops;
|
|
|
|
} else if (S_ISDIR(inode->i_mode)) {
|
|
|
|
inode->i_op = &f2fs_dir_inode_operations;
|
|
|
|
inode->i_fop = &f2fs_dir_operations;
|
|
|
|
inode->i_mapping->a_ops = &f2fs_dblock_aops;
|
2018-02-28 20:31:52 +08:00
|
|
|
inode_nohighmem(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
} else if (S_ISLNK(inode->i_mode)) {
|
2015-04-30 06:10:53 +08:00
|
|
|
if (f2fs_encrypted_inode(inode))
|
|
|
|
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
|
|
|
|
else
|
|
|
|
inode->i_op = &f2fs_symlink_inode_operations;
|
2015-11-17 14:07:57 +08:00
|
|
|
inode_nohighmem(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
inode->i_mapping->a_ops = &f2fs_dblock_aops;
|
|
|
|
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
|
|
|
|
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
|
|
|
|
inode->i_op = &f2fs_special_inode_operations;
|
|
|
|
init_special_inode(inode, inode->i_mode, inode->i_rdev);
|
|
|
|
} else {
|
|
|
|
ret = -EIO;
|
|
|
|
goto bad_inode;
|
|
|
|
}
|
2017-05-17 04:20:16 +08:00
|
|
|
f2fs_set_inode_flags(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
unlock_new_inode(inode);
|
2013-04-20 00:28:40 +08:00
|
|
|
trace_f2fs_iget(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
return inode;
|
|
|
|
|
|
|
|
bad_inode:
|
|
|
|
iget_failed(inode);
|
2013-04-20 00:28:40 +08:00
|
|
|
trace_f2fs_iget_exit(inode, ret);
|
2012-11-02 16:10:40 +08:00
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
2016-09-10 07:59:39 +08:00
|
|
|
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
|
|
|
|
{
|
|
|
|
struct inode *inode;
|
|
|
|
retry:
|
|
|
|
inode = f2fs_iget(sb, ino);
|
|
|
|
if (IS_ERR(inode)) {
|
|
|
|
if (PTR_ERR(inode) == -ENOMEM) {
|
|
|
|
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return inode;
|
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
void f2fs_update_inode(struct inode *inode, struct page *node_page)
|
2012-11-02 16:10:40 +08:00
|
|
|
{
|
|
|
|
struct f2fs_inode *ri;
|
2016-10-11 22:57:05 +08:00
|
|
|
struct extent_tree *et = F2FS_I(inode)->extent_tree;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(node_page, NODE, true);
|
2017-12-05 12:07:47 +08:00
|
|
|
set_page_dirty(node_page);
|
|
|
|
|
|
|
|
f2fs_inode_synced(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2013-12-26 15:30:41 +08:00
|
|
|
ri = F2FS_INODE(node_page);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
ri->i_mode = cpu_to_le16(inode->i_mode);
|
|
|
|
ri->i_advise = F2FS_I(inode)->i_advise;
|
|
|
|
ri->i_uid = cpu_to_le32(i_uid_read(inode));
|
|
|
|
ri->i_gid = cpu_to_le32(i_gid_read(inode));
|
|
|
|
ri->i_links = cpu_to_le32(inode->i_nlink);
|
|
|
|
ri->i_size = cpu_to_le64(i_size_read(inode));
|
f2fs: don't count inode block in in-memory inode.i_blocks
Previously, we count all inode consumed blocks including inode block,
xattr block, index block, data block into i_blocks, for other generic
filesystems, they won't count inode block into i_blocks, so for
userspace applications or quota system, they may detect incorrect block
count according to i_blocks value in inode.
This patch changes to count all blocks into inode.i_blocks excluding
inode block, for on-disk i_blocks, we keep counting inode block for
backward compatibility.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-06 01:11:31 +08:00
|
|
|
ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
|
2015-02-05 17:46:29 +08:00
|
|
|
|
2016-10-11 22:57:05 +08:00
|
|
|
if (et) {
|
|
|
|
read_lock(&et->lock);
|
|
|
|
set_raw_extent(&et->largest, &ri->i_ext);
|
|
|
|
read_unlock(&et->lock);
|
|
|
|
} else {
|
2015-06-20 08:53:26 +08:00
|
|
|
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
|
2016-10-11 22:57:05 +08:00
|
|
|
}
|
2016-05-21 01:13:22 +08:00
|
|
|
set_raw_inline(inode, ri);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
|
|
|
|
ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
|
|
|
|
ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
|
|
|
|
ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
|
|
|
|
ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
|
|
|
|
ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
|
2018-05-07 20:28:52 +08:00
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
|
|
ri->i_current_depth =
|
|
|
|
cpu_to_le32(F2FS_I(inode)->i_current_depth);
|
|
|
|
else if (S_ISREG(inode->i_mode))
|
f2fs: avoid stucking GC due to atomic write
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.
Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.
In addition, his patch adds to show GC skip information in debugfs,
now it just shows count of skipped caused by atomic write.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-07 20:28:54 +08:00
|
|
|
ri->i_gc_failures =
|
|
|
|
cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]);
|
2012-11-02 16:10:40 +08:00
|
|
|
ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
|
|
|
|
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
|
f2fs: fix tracking parent inode number
Previously, f2fs didn't track the parent inode number correctly which is stored
in each f2fs_inode. In the case of the following scenario, a bug can be occured.
Let's suppose there are one directory, "/b", and two files, "/a" and "/b/a".
- pino of "/a" is ROOT_INO.
- pino of "/b/a" is DIR_B_INO.
Then,
# sync
: The inode pages of "/a" and "/b/a" contain the parent inode numbers as
ROOT_INO and DIR_B_INO respectively.
# mv /a /b/a
: The parent inode number of "/a" should be changed to DIR_B_INO, but f2fs
didn't do that. Ref. f2fs_set_link().
In order to fix this clearly, I added i_pino in f2fs_inode_info, and whenever
it needs to be changed like in f2fs_add_link() and f2fs_set_link(), it is
updated temporarily in f2fs_inode_info.
And later, f2fs_write_inode() stores the latest information to the inode pages.
For power-off-recovery, f2fs_sync_file() triggers simply f2fs_write_inode().
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-12-10 16:52:48 +08:00
|
|
|
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
|
2012-11-02 16:10:40 +08:00
|
|
|
ri->i_generation = cpu_to_le32(inode->i_generation);
|
2014-02-27 17:20:00 +08:00
|
|
|
ri->i_dir_level = F2FS_I(inode)->i_dir_level;
|
f2fs: save device node number into f2fs_inode
This patch stores inode->i_rdev into on-disk inode structure.
Alun reported that:
aspire tmp # mount -t f2fs /dev/sdb mnt
aspire tmp # mknod mnt/sda1 b 8 1
aspire tmp # mknod mnt/null c 1 3
aspire tmp # mknod mnt/console c 5 1
aspire tmp # ls -l mnt
total 2
crw-r--r-- 1 root root 5, 1 Jan 22 18:44 console
crw-r--r-- 1 root root 1, 3 Jan 22 18:44 null
brw-r--r-- 1 root root 8, 1 Jan 22 18:44 sda1
aspire tmp # umount mnt
aspire tmp # mount -t f2fs /dev/sdb mnt
aspire tmp # ls -l mnt
total 2
crw-r--r-- 1 root root 0, 0 Jan 22 18:44 console
crw-r--r-- 1 root root 0, 0 Jan 22 18:44 null
brw-r--r-- 1 root root 0, 0 Jan 22 18:44 sda1
In this report, f2fs lost the major/minor numbers of device files after umount.
The reason was revealed that f2fs does not store the inode->i_rdev to the
on-disk inode data structure.
So, as the other file systems do, f2fs also stores i_rdev into the i_addr fields
in on-disk inode structure without any on-disk layout changes.
Note that, this bug is limited to device files made by mknod().
Reported-and-Tested-by: Alun Jones <alun.linux@ty-penguin.org.uk>
Signed-off-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-01-23 08:40:23 +08:00
|
|
|
|
2017-07-26 00:01:41 +08:00
|
|
|
if (f2fs_has_extra_attr(inode)) {
|
2017-07-19 00:19:06 +08:00
|
|
|
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
|
|
|
|
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
|
|
|
|
ri->i_inline_xattr_size =
|
|
|
|
cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
|
|
|
|
|
2017-07-26 00:01:41 +08:00
|
|
|
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
|
|
|
|
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
|
|
|
|
i_projid)) {
|
|
|
|
projid_t i_projid;
|
|
|
|
|
|
|
|
i_projid = from_kprojid(&init_user_ns,
|
|
|
|
F2FS_I(inode)->i_projid);
|
|
|
|
ri->i_projid = cpu_to_le32(i_projid);
|
|
|
|
}
|
2018-01-25 14:54:42 +08:00
|
|
|
|
|
|
|
if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
|
|
|
|
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
|
|
|
|
i_crtime)) {
|
|
|
|
ri->i_crtime =
|
|
|
|
cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec);
|
|
|
|
ri->i_crtime_nsec =
|
|
|
|
cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec);
|
|
|
|
}
|
2017-07-26 00:01:41 +08:00
|
|
|
}
|
|
|
|
|
2013-10-08 17:01:51 +08:00
|
|
|
__set_inode_rdev(inode, ri);
|
2016-01-08 05:23:12 +08:00
|
|
|
|
2016-01-25 21:57:05 +08:00
|
|
|
/* deleted inode */
|
|
|
|
if (inode->i_nlink == 0)
|
|
|
|
clear_inline_node(node_page);
|
|
|
|
|
vfs: change inode times to use struct timespec64
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.
The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.
The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.
virtual patch
@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
current_time ( ... )
{
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
...
- return timespec_trunc(
+ return timespec64_trunc(
... );
}
@ depends on patch @
identifier xtime;
@@
struct \( iattr \| inode \| kstat \) {
...
- struct timespec xtime;
+ struct timespec64 xtime;
...
}
@ depends on patch @
identifier t;
@@
struct inode_operations {
...
int (*update_time) (...,
- struct timespec t,
+ struct timespec64 t,
...);
...
}
@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
...) { ... }
@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
) { ... }
@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)
<+... when != ts
(
- timespec_equal(&inode_node->i_xtime, &ts)
+ timespec64_equal(&inode_node->i_xtime, &ts)
|
- timespec_equal(&ts, &inode_node->i_xtime)
+ timespec64_equal(&ts, &inode_node->i_xtime)
|
- timespec_compare(&inode_node->i_xtime, &ts)
+ timespec64_compare(&inode_node->i_xtime, &ts)
|
- timespec_compare(&ts, &inode_node->i_xtime)
+ timespec64_compare(&ts, &inode_node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., &ts,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts(&ts)
+ ktime_get_real_ts64(&ts)
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_equal(&node1->i_xtime2, &node2->i_xtime2)
|
- timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2)
+ timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2)
|
- timespec_compare(&node1->i_xtime1, &node2->i_xtime2)
+ timespec64_compare(&node1->i_xtime1, &node2->i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(&attr1->ia_xtime1)
+ ktime_get_real_ts64(&attr1->ia_xtime1)
|
- ktime_get_real_ts(&attr.ia_xtime1)
+ ktime_get_real_ts64(&attr.ia_xtime1)
)
@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- &attr->ia_xtime,
+ &ts,
...);
)
...+>
}
@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- &node->i_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime,
+ &ts,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
ret = fn (...,
- &attr->ia_xtime);
+ &ts);
|
+ ts = timespec64_to_timespec(stat->xtime);
ret = fn (...,
- &stat->xtime);
+ &ts);
)
...+>
}
@ depends on patch @
struct inode *node;
struct inode *node2;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier i_xtime3 =~ "^i_[acm]time$";
struct iattr *attrp;
struct iattr *attrp2;
struct iattr attr ;
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
struct kstat *stat;
struct kstat stat1;
struct timespec64 ts;
identifier xtime =~ "^[acmb]time$";
expression e;
@@
(
( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ;
|
node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \);
|
stat->xtime = node2->i_xtime1;
|
stat1.xtime = node2->i_xtime1;
|
( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ;
|
( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2;
|
- e = node->i_xtime1;
+ e = timespec64_to_timespec( node->i_xtime1 );
|
- e = attrp->ia_xtime1;
+ e = timespec64_to_timespec( attrp->ia_xtime1 );
|
node->i_xtime1 = current_time(...);
|
node->i_xtime2 = node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
node->i_xtime1 = node->i_xtime3 =
- e;
+ timespec_to_timespec64(e);
|
- node->i_xtime1 = e;
+ node->i_xtime1 = timespec_to_timespec64(e);
)
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: <anton@tuxera.com>
Cc: <balbi@kernel.org>
Cc: <bfields@fieldses.org>
Cc: <darrick.wong@oracle.com>
Cc: <dhowells@redhat.com>
Cc: <dsterba@suse.com>
Cc: <dwmw2@infradead.org>
Cc: <hch@lst.de>
Cc: <hirofumi@mail.parknet.co.jp>
Cc: <hubcap@omnibond.com>
Cc: <jack@suse.com>
Cc: <jaegeuk@kernel.org>
Cc: <jaharkes@cs.cmu.edu>
Cc: <jslaby@suse.com>
Cc: <keescook@chromium.org>
Cc: <mark@fasheh.com>
Cc: <miklos@szeredi.hu>
Cc: <nico@linaro.org>
Cc: <reiserfs-devel@vger.kernel.org>
Cc: <richard@nod.at>
Cc: <sage@redhat.com>
Cc: <sfrench@samba.org>
Cc: <swhiteho@redhat.com>
Cc: <tj@kernel.org>
Cc: <trond.myklebust@primarydata.com>
Cc: <tytso@mit.edu>
Cc: <viro@zeniv.linux.org.uk>
2018-05-09 10:36:02 +08:00
|
|
|
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
|
|
|
|
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
|
|
|
|
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
|
2018-03-30 13:50:41 +08:00
|
|
|
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
|
2018-03-09 23:10:21 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_F2FS_CHECK_FS
|
|
|
|
f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
|
|
|
|
#endif
|
2012-11-02 16:10:40 +08:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
void f2fs_update_inode_page(struct inode *inode)
|
2012-11-02 16:10:40 +08:00
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
struct page *node_page;
|
2014-01-24 08:42:16 +08:00
|
|
|
retry:
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
node_page = f2fs_get_node_page(sbi, inode->i_ino);
|
2014-01-24 08:42:16 +08:00
|
|
|
if (IS_ERR(node_page)) {
|
|
|
|
int err = PTR_ERR(node_page);
|
|
|
|
if (err == -ENOMEM) {
|
|
|
|
cond_resched();
|
|
|
|
goto retry;
|
|
|
|
} else if (err != -ENOENT) {
|
2016-05-19 05:07:56 +08:00
|
|
|
f2fs_stop_checkpoint(sbi, false);
|
2014-01-24 08:42:16 +08:00
|
|
|
}
|
2017-12-05 12:07:47 +08:00
|
|
|
return;
|
2014-01-24 08:42:16 +08:00
|
|
|
}
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_update_inode(inode, node_page);
|
2012-11-02 16:10:40 +08:00
|
|
|
f2fs_put_page(node_page, 1);
|
|
|
|
}
|
|
|
|
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
|
|
|
|
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
|
|
|
|
inode->i_ino == F2FS_META_INO(sbi))
|
|
|
|
return 0;
|
|
|
|
|
2016-05-21 01:13:22 +08:00
|
|
|
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
|
2013-06-10 08:17:01 +08:00
|
|
|
return 0;
|
|
|
|
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
/*
|
2015-09-13 02:25:30 +08:00
|
|
|
* We need to balance fs here to prevent from producing dirty node pages
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
* during the urgent cleaning time when runing out of free sections.
|
|
|
|
*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_update_inode_page(inode);
|
2017-04-21 04:51:57 +08:00
|
|
|
if (wbc && wbc->nr_to_write)
|
2016-01-08 06:15:04 +08:00
|
|
|
f2fs_balance_fs(sbi, true);
|
2014-01-24 08:42:16 +08:00
|
|
|
return 0;
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
}
|
|
|
|
|
2012-11-29 12:28:09 +08:00
|
|
|
/*
|
2012-11-02 16:10:40 +08:00
|
|
|
* Called at the last iput() if i_nlink is zero
|
|
|
|
*/
|
|
|
|
void f2fs_evict_inode(struct inode *inode)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2016-05-21 01:13:22 +08:00
|
|
|
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
|
2015-08-24 17:40:45 +08:00
|
|
|
int err = 0;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2014-10-07 08:39:50 +08:00
|
|
|
/* some remained atomic pages should discarded */
|
2014-12-09 22:08:59 +08:00
|
|
|
if (f2fs_is_atomic_file(inode))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_drop_inmem_pages(inode);
|
2014-10-07 08:39:50 +08:00
|
|
|
|
2013-04-20 00:28:40 +08:00
|
|
|
trace_f2fs_evict_inode(inode);
|
2014-04-04 05:47:49 +08:00
|
|
|
truncate_inode_pages_final(&inode->i_data);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
|
|
|
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
|
|
|
|
inode->i_ino == F2FS_META_INO(sbi))
|
f2fs: avoid use invalid mapping of node_inode when evict meta inode
Andrey Tsyvarev reported:
"Using memory error detector reveals the following use-after-free error
in 3.15.0:
AddressSanitizer: heap-use-after-free in f2fs_evict_inode
Read of size 8 by thread T22279:
[<ffffffffa02d8702>] f2fs_evict_inode+0x102/0x2e0 [f2fs]
[<ffffffff812359af>] evict+0x15f/0x290
[< inlined >] iput+0x196/0x280 iput_final
[<ffffffff812369a6>] iput+0x196/0x280
[<ffffffffa02dc416>] f2fs_put_super+0xd6/0x170 [f2fs]
[<ffffffff81210095>] generic_shutdown_super+0xc5/0x1b0
[<ffffffff812105fd>] kill_block_super+0x4d/0xb0
[<ffffffff81210a86>] deactivate_locked_super+0x66/0x80
[<ffffffff81211c98>] deactivate_super+0x68/0x80
[<ffffffff8123cc88>] mntput_no_expire+0x198/0x250
[< inlined >] SyS_umount+0xe9/0x1a0 SYSC_umount
[<ffffffff8123f1c9>] SyS_umount+0xe9/0x1a0
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
Freed by thread T3:
[<ffffffffa02dc337>] f2fs_i_callback+0x27/0x30 [f2fs]
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_reclaim
[< inlined >] rcu_process_callbacks+0x2d6/0x930 rcu_do_batch
[< inlined >] rcu_process_callbacks+0x2d6/0x930 invoke_rcu_callbacks
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_process_callbacks
[<ffffffff810fd266>] rcu_process_callbacks+0x2d6/0x930
[<ffffffff8107cce2>] __do_softirq+0x142/0x380
[<ffffffff8107cf50>] run_ksoftirqd+0x30/0x50
[<ffffffff810b2a87>] smpboot_thread_fn+0x197/0x280
[<ffffffff810a8238>] kthread+0x148/0x160
[<ffffffff81cc8d4c>] ret_from_fork+0x7c/0xb0
Allocated by thread T22276:
[<ffffffffa02dc7dd>] f2fs_alloc_inode+0x2d/0x170 [f2fs]
[<ffffffff81235e2a>] iget_locked+0x10a/0x230
[<ffffffffa02d7495>] f2fs_iget+0x35/0xa80 [f2fs]
[<ffffffffa02e2393>] f2fs_fill_super+0xb53/0xff0 [f2fs]
[<ffffffff81211bce>] mount_bdev+0x1de/0x240
[<ffffffffa02dbce0>] f2fs_mount+0x10/0x20 [f2fs]
[<ffffffff81212a85>] mount_fs+0x55/0x220
[<ffffffff8123c026>] vfs_kern_mount+0x66/0x200
[< inlined >] do_mount+0x2b4/0x1120 do_new_mount
[<ffffffff812400d4>] do_mount+0x2b4/0x1120
[< inlined >] SyS_mount+0xb2/0x110 SYSC_mount
[<ffffffff812414a2>] SyS_mount+0xb2/0x110
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
The buggy address ffff8800587866c8 is located 48 bytes inside
of 680-byte region [ffff880058786698, ffff880058786940)
Memory state around the buggy address:
ffff880058786100: ffffffff ffffffff ffffffff ffffffff
ffff880058786200: ffffffff ffffffff ffffffrr rrrrrrrr
ffff880058786300: rrrrrrrr rrffffff ffffffff ffffffff
ffff880058786400: ffffffff ffffffff ffffffff ffffffff
ffff880058786500: ffffffff ffffffff ffffffff fffffffr
>ffff880058786600: rrrrrrrr rrrrrrrr rrrfffff ffffffff
^
ffff880058786700: ffffffff ffffffff ffffffff ffffffff
ffff880058786800: ffffffff ffffffff ffffffff ffffffff
ffff880058786900: ffffffff rrrrrrrr rrrrrrrr rrrr....
ffff880058786a00: ........ ........ ........ ........
ffff880058786b00: ........ ........ ........ ........
Legend:
f - 8 freed bytes
r - 8 redzone bytes
. - 8 allocated bytes
x=1..7 - x allocated bytes + (8-x) redzone bytes
Investigation shows, that f2fs_evict_inode, when called for
'meta_inode', uses invalidate_mapping_pages() for 'node_inode'.
But 'node_inode' is deleted before 'meta_inode' in f2fs_put_super via
iput().
It seems that in common usage scenario this use-after-free is benign,
because 'node_inode' remains partially valid data even after
kmem_cache_free().
But things may change if, while 'meta_inode' is evicted in one f2fs
filesystem, another (mounted) f2fs filesystem requests inode from cache,
and formely
'node_inode' of the first filesystem is returned."
Nids for both meta_inode and node_inode are reservation, so it's not necessary
for us to invalidate pages which will never be allocated.
To fix this issue, let's skipping needlessly invalidating pages for
{meta,node}_inode in f2fs_evict_inode.
Reported-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Tested-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-07-25 12:00:57 +08:00
|
|
|
goto out_clear;
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2014-09-13 06:53:45 +08:00
|
|
|
f2fs_bug_on(sbi, get_dirty_pages(inode));
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_remove_dirty_inode(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2015-06-20 08:53:26 +08:00
|
|
|
f2fs_destroy_extent_tree(inode);
|
|
|
|
|
2012-11-02 16:10:40 +08:00
|
|
|
if (inode->i_nlink || is_bad_inode(inode))
|
|
|
|
goto no_delete;
|
|
|
|
|
2017-07-09 00:13:07 +08:00
|
|
|
dquot_initialize(inode);
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
|
|
|
|
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
|
|
|
|
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
|
2016-11-02 20:43:21 +08:00
|
|
|
|
2013-01-29 17:30:07 +08:00
|
|
|
sb_start_intwrite(inode->i_sb);
|
2016-05-21 01:13:22 +08:00
|
|
|
set_inode_flag(inode, FI_NO_ALLOC);
|
2012-11-02 16:10:40 +08:00
|
|
|
i_size_write(inode, 0);
|
2016-05-04 00:22:18 +08:00
|
|
|
retry:
|
2012-11-02 16:10:40 +08:00
|
|
|
if (F2FS_HAS_BLOCKS(inode))
|
2016-06-03 04:49:38 +08:00
|
|
|
err = f2fs_truncate(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
|
2017-03-08 05:32:20 +08:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
|
|
|
|
f2fs_show_injection_info(FAULT_EVICT_INODE);
|
|
|
|
err = -EIO;
|
|
|
|
}
|
|
|
|
#endif
|
2015-08-24 17:40:45 +08:00
|
|
|
if (!err) {
|
|
|
|
f2fs_lock_op(sbi);
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
err = f2fs_remove_inode_page(inode);
|
2015-08-24 17:40:45 +08:00
|
|
|
f2fs_unlock_op(sbi);
|
2016-10-11 22:56:59 +08:00
|
|
|
if (err == -ENOENT)
|
|
|
|
err = 0;
|
2015-08-24 17:40:45 +08:00
|
|
|
}
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
|
2016-05-04 00:22:18 +08:00
|
|
|
/* give more chances, if ENOMEM case */
|
|
|
|
if (err == -ENOMEM) {
|
|
|
|
err = 0;
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2016-05-21 02:10:10 +08:00
|
|
|
if (err)
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_update_inode_page(inode);
|
2017-07-09 00:13:07 +08:00
|
|
|
dquot_free_inode(inode);
|
2013-01-29 17:30:07 +08:00
|
|
|
sb_end_intwrite(inode->i_sb);
|
2012-11-02 16:10:40 +08:00
|
|
|
no_delete:
|
2017-07-09 00:13:07 +08:00
|
|
|
dquot_drop(inode);
|
|
|
|
|
2015-07-15 17:28:53 +08:00
|
|
|
stat_dec_inline_xattr(inode);
|
2014-10-14 11:00:16 +08:00
|
|
|
stat_dec_inline_dir(inode);
|
2014-10-15 01:29:50 +08:00
|
|
|
stat_dec_inline_inode(inode);
|
2015-03-19 19:27:51 +08:00
|
|
|
|
2017-10-13 10:12:53 +08:00
|
|
|
if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
|
2017-09-12 14:04:05 +08:00
|
|
|
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
|
2017-10-13 10:12:53 +08:00
|
|
|
else
|
|
|
|
f2fs_inode_synced(inode);
|
2017-09-12 14:04:05 +08:00
|
|
|
|
2017-03-05 05:56:10 +08:00
|
|
|
/* ino == 0, if f2fs_new_inode() was failed t*/
|
|
|
|
if (inode->i_ino)
|
|
|
|
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
|
|
|
|
inode->i_ino);
|
2014-08-04 09:54:58 +08:00
|
|
|
if (xnid)
|
|
|
|
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
|
2016-11-02 20:43:21 +08:00
|
|
|
if (inode->i_nlink) {
|
|
|
|
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_add_ino_entry(sbi, inode->i_ino, APPEND_INO);
|
2016-11-02 20:43:21 +08:00
|
|
|
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
|
2016-11-02 20:43:21 +08:00
|
|
|
}
|
2016-05-21 01:13:22 +08:00
|
|
|
if (is_inode_flag_set(inode, FI_FREE_NID)) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_alloc_nid_failed(sbi, inode->i_ino);
|
2016-05-21 01:13:22 +08:00
|
|
|
clear_inode_flag(inode, FI_FREE_NID);
|
2017-06-02 06:39:27 +08:00
|
|
|
} else {
|
2018-04-24 13:02:31 +08:00
|
|
|
/*
|
|
|
|
* If xattr nid is corrupted, we can reach out error condition,
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
* err & !f2fs_exist_written_data(sbi, inode->i_ino, ORPHAN_INO)).
|
|
|
|
* In that case, f2fs_check_nid_range() is enough to give a clue.
|
2018-04-24 13:02:31 +08:00
|
|
|
*/
|
2015-06-24 01:36:08 +08:00
|
|
|
}
|
f2fs: avoid use invalid mapping of node_inode when evict meta inode
Andrey Tsyvarev reported:
"Using memory error detector reveals the following use-after-free error
in 3.15.0:
AddressSanitizer: heap-use-after-free in f2fs_evict_inode
Read of size 8 by thread T22279:
[<ffffffffa02d8702>] f2fs_evict_inode+0x102/0x2e0 [f2fs]
[<ffffffff812359af>] evict+0x15f/0x290
[< inlined >] iput+0x196/0x280 iput_final
[<ffffffff812369a6>] iput+0x196/0x280
[<ffffffffa02dc416>] f2fs_put_super+0xd6/0x170 [f2fs]
[<ffffffff81210095>] generic_shutdown_super+0xc5/0x1b0
[<ffffffff812105fd>] kill_block_super+0x4d/0xb0
[<ffffffff81210a86>] deactivate_locked_super+0x66/0x80
[<ffffffff81211c98>] deactivate_super+0x68/0x80
[<ffffffff8123cc88>] mntput_no_expire+0x198/0x250
[< inlined >] SyS_umount+0xe9/0x1a0 SYSC_umount
[<ffffffff8123f1c9>] SyS_umount+0xe9/0x1a0
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
Freed by thread T3:
[<ffffffffa02dc337>] f2fs_i_callback+0x27/0x30 [f2fs]
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_reclaim
[< inlined >] rcu_process_callbacks+0x2d6/0x930 rcu_do_batch
[< inlined >] rcu_process_callbacks+0x2d6/0x930 invoke_rcu_callbacks
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_process_callbacks
[<ffffffff810fd266>] rcu_process_callbacks+0x2d6/0x930
[<ffffffff8107cce2>] __do_softirq+0x142/0x380
[<ffffffff8107cf50>] run_ksoftirqd+0x30/0x50
[<ffffffff810b2a87>] smpboot_thread_fn+0x197/0x280
[<ffffffff810a8238>] kthread+0x148/0x160
[<ffffffff81cc8d4c>] ret_from_fork+0x7c/0xb0
Allocated by thread T22276:
[<ffffffffa02dc7dd>] f2fs_alloc_inode+0x2d/0x170 [f2fs]
[<ffffffff81235e2a>] iget_locked+0x10a/0x230
[<ffffffffa02d7495>] f2fs_iget+0x35/0xa80 [f2fs]
[<ffffffffa02e2393>] f2fs_fill_super+0xb53/0xff0 [f2fs]
[<ffffffff81211bce>] mount_bdev+0x1de/0x240
[<ffffffffa02dbce0>] f2fs_mount+0x10/0x20 [f2fs]
[<ffffffff81212a85>] mount_fs+0x55/0x220
[<ffffffff8123c026>] vfs_kern_mount+0x66/0x200
[< inlined >] do_mount+0x2b4/0x1120 do_new_mount
[<ffffffff812400d4>] do_mount+0x2b4/0x1120
[< inlined >] SyS_mount+0xb2/0x110 SYSC_mount
[<ffffffff812414a2>] SyS_mount+0xb2/0x110
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
The buggy address ffff8800587866c8 is located 48 bytes inside
of 680-byte region [ffff880058786698, ffff880058786940)
Memory state around the buggy address:
ffff880058786100: ffffffff ffffffff ffffffff ffffffff
ffff880058786200: ffffffff ffffffff ffffffrr rrrrrrrr
ffff880058786300: rrrrrrrr rrffffff ffffffff ffffffff
ffff880058786400: ffffffff ffffffff ffffffff ffffffff
ffff880058786500: ffffffff ffffffff ffffffff fffffffr
>ffff880058786600: rrrrrrrr rrrrrrrr rrrfffff ffffffff
^
ffff880058786700: ffffffff ffffffff ffffffff ffffffff
ffff880058786800: ffffffff ffffffff ffffffff ffffffff
ffff880058786900: ffffffff rrrrrrrr rrrrrrrr rrrr....
ffff880058786a00: ........ ........ ........ ........
ffff880058786b00: ........ ........ ........ ........
Legend:
f - 8 freed bytes
r - 8 redzone bytes
. - 8 allocated bytes
x=1..7 - x allocated bytes + (8-x) redzone bytes
Investigation shows, that f2fs_evict_inode, when called for
'meta_inode', uses invalidate_mapping_pages() for 'node_inode'.
But 'node_inode' is deleted before 'meta_inode' in f2fs_put_super via
iput().
It seems that in common usage scenario this use-after-free is benign,
because 'node_inode' remains partially valid data even after
kmem_cache_free().
But things may change if, while 'meta_inode' is evicted in one f2fs
filesystem, another (mounted) f2fs filesystem requests inode from cache,
and formely
'node_inode' of the first filesystem is returned."
Nids for both meta_inode and node_inode are reservation, so it's not necessary
for us to invalidate pages which will never be allocated.
To fix this issue, let's skipping needlessly invalidating pages for
{meta,node}_inode in f2fs_evict_inode.
Reported-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Tested-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-07-25 12:00:57 +08:00
|
|
|
out_clear:
|
2018-01-12 12:30:13 +08:00
|
|
|
fscrypt_put_encryption_info(inode);
|
f2fs: avoid use invalid mapping of node_inode when evict meta inode
Andrey Tsyvarev reported:
"Using memory error detector reveals the following use-after-free error
in 3.15.0:
AddressSanitizer: heap-use-after-free in f2fs_evict_inode
Read of size 8 by thread T22279:
[<ffffffffa02d8702>] f2fs_evict_inode+0x102/0x2e0 [f2fs]
[<ffffffff812359af>] evict+0x15f/0x290
[< inlined >] iput+0x196/0x280 iput_final
[<ffffffff812369a6>] iput+0x196/0x280
[<ffffffffa02dc416>] f2fs_put_super+0xd6/0x170 [f2fs]
[<ffffffff81210095>] generic_shutdown_super+0xc5/0x1b0
[<ffffffff812105fd>] kill_block_super+0x4d/0xb0
[<ffffffff81210a86>] deactivate_locked_super+0x66/0x80
[<ffffffff81211c98>] deactivate_super+0x68/0x80
[<ffffffff8123cc88>] mntput_no_expire+0x198/0x250
[< inlined >] SyS_umount+0xe9/0x1a0 SYSC_umount
[<ffffffff8123f1c9>] SyS_umount+0xe9/0x1a0
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
Freed by thread T3:
[<ffffffffa02dc337>] f2fs_i_callback+0x27/0x30 [f2fs]
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_reclaim
[< inlined >] rcu_process_callbacks+0x2d6/0x930 rcu_do_batch
[< inlined >] rcu_process_callbacks+0x2d6/0x930 invoke_rcu_callbacks
[< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_process_callbacks
[<ffffffff810fd266>] rcu_process_callbacks+0x2d6/0x930
[<ffffffff8107cce2>] __do_softirq+0x142/0x380
[<ffffffff8107cf50>] run_ksoftirqd+0x30/0x50
[<ffffffff810b2a87>] smpboot_thread_fn+0x197/0x280
[<ffffffff810a8238>] kthread+0x148/0x160
[<ffffffff81cc8d4c>] ret_from_fork+0x7c/0xb0
Allocated by thread T22276:
[<ffffffffa02dc7dd>] f2fs_alloc_inode+0x2d/0x170 [f2fs]
[<ffffffff81235e2a>] iget_locked+0x10a/0x230
[<ffffffffa02d7495>] f2fs_iget+0x35/0xa80 [f2fs]
[<ffffffffa02e2393>] f2fs_fill_super+0xb53/0xff0 [f2fs]
[<ffffffff81211bce>] mount_bdev+0x1de/0x240
[<ffffffffa02dbce0>] f2fs_mount+0x10/0x20 [f2fs]
[<ffffffff81212a85>] mount_fs+0x55/0x220
[<ffffffff8123c026>] vfs_kern_mount+0x66/0x200
[< inlined >] do_mount+0x2b4/0x1120 do_new_mount
[<ffffffff812400d4>] do_mount+0x2b4/0x1120
[< inlined >] SyS_mount+0xb2/0x110 SYSC_mount
[<ffffffff812414a2>] SyS_mount+0xb2/0x110
[<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b
The buggy address ffff8800587866c8 is located 48 bytes inside
of 680-byte region [ffff880058786698, ffff880058786940)
Memory state around the buggy address:
ffff880058786100: ffffffff ffffffff ffffffff ffffffff
ffff880058786200: ffffffff ffffffff ffffffrr rrrrrrrr
ffff880058786300: rrrrrrrr rrffffff ffffffff ffffffff
ffff880058786400: ffffffff ffffffff ffffffff ffffffff
ffff880058786500: ffffffff ffffffff ffffffff fffffffr
>ffff880058786600: rrrrrrrr rrrrrrrr rrrfffff ffffffff
^
ffff880058786700: ffffffff ffffffff ffffffff ffffffff
ffff880058786800: ffffffff ffffffff ffffffff ffffffff
ffff880058786900: ffffffff rrrrrrrr rrrrrrrr rrrr....
ffff880058786a00: ........ ........ ........ ........
ffff880058786b00: ........ ........ ........ ........
Legend:
f - 8 freed bytes
r - 8 redzone bytes
. - 8 allocated bytes
x=1..7 - x allocated bytes + (8-x) redzone bytes
Investigation shows, that f2fs_evict_inode, when called for
'meta_inode', uses invalidate_mapping_pages() for 'node_inode'.
But 'node_inode' is deleted before 'meta_inode' in f2fs_put_super via
iput().
It seems that in common usage scenario this use-after-free is benign,
because 'node_inode' remains partially valid data even after
kmem_cache_free().
But things may change if, while 'meta_inode' is evicted in one f2fs
filesystem, another (mounted) f2fs filesystem requests inode from cache,
and formely
'node_inode' of the first filesystem is returned."
Nids for both meta_inode and node_inode are reservation, so it's not necessary
for us to invalidate pages which will never be allocated.
To fix this issue, let's skipping needlessly invalidating pages for
{meta,node}_inode in f2fs_evict_inode.
Reported-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Tested-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-07-25 12:00:57 +08:00
|
|
|
clear_inode(inode);
|
2012-11-02 16:10:40 +08:00
|
|
|
}
|
2014-09-26 02:55:53 +08:00
|
|
|
|
|
|
|
/* caller should call f2fs_lock_op() */
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
void f2fs_handle_failed_inode(struct inode *inode)
|
2014-09-26 02:55:53 +08:00
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2016-05-03 03:34:48 +08:00
|
|
|
struct node_info ni;
|
2014-09-26 02:55:53 +08:00
|
|
|
|
2016-10-11 22:56:59 +08:00
|
|
|
/*
|
|
|
|
* clear nlink of inode in order to release resource of inode
|
|
|
|
* immediately.
|
|
|
|
*/
|
|
|
|
clear_nlink(inode);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we must call this to avoid inode being remained as dirty, resulting
|
|
|
|
* in a panic when flushing dirty inodes in gdirty_list.
|
|
|
|
*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_update_inode_page(inode);
|
2017-04-12 10:01:26 +08:00
|
|
|
f2fs_inode_synced(inode);
|
2016-10-11 22:56:59 +08:00
|
|
|
|
2016-05-03 03:34:48 +08:00
|
|
|
/* don't make bad inode, since it becomes a regular file. */
|
2014-09-26 02:55:53 +08:00
|
|
|
unlock_new_inode(inode);
|
|
|
|
|
2015-08-24 17:40:45 +08:00
|
|
|
/*
|
|
|
|
* Note: we should add inode to orphan list before f2fs_unlock_op()
|
|
|
|
* so we can prevent losing this orphan when encoutering checkpoint
|
|
|
|
* and following suddenly power-off.
|
|
|
|
*/
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_get_node_info(sbi, inode->i_ino, &ni);
|
2016-05-03 03:34:48 +08:00
|
|
|
|
|
|
|
if (ni.blk_addr != NULL_ADDR) {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
int err = f2fs_acquire_orphan_inode(sbi);
|
2016-05-03 03:34:48 +08:00
|
|
|
if (err) {
|
|
|
|
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
|
|
|
f2fs_msg(sbi->sb, KERN_WARNING,
|
|
|
|
"Too many orphan inodes, run fsck to fix.");
|
|
|
|
} else {
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_add_orphan_inode(inode);
|
2016-05-03 03:34:48 +08:00
|
|
|
}
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_alloc_nid_done(sbi, inode->i_ino);
|
2016-05-03 03:34:48 +08:00
|
|
|
} else {
|
2016-05-21 01:13:22 +08:00
|
|
|
set_inode_flag(inode, FI_FREE_NID);
|
2015-08-24 17:40:45 +08:00
|
|
|
}
|
2014-09-26 02:55:53 +08:00
|
|
|
|
|
|
|
f2fs_unlock_op(sbi);
|
|
|
|
|
|
|
|
/* iput will drop the inode object */
|
|
|
|
iput(inode);
|
|
|
|
}
|