2018-09-12 09:16:07 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2017-06-14 17:39:47 +08:00
|
|
|
/*
|
|
|
|
* f2fs sysfs interface
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
* Copyright (c) 2017 Chao Yu <chao@kernel.org>
|
|
|
|
*/
|
2018-07-07 11:50:57 +08:00
|
|
|
#include <linux/compiler.h>
|
2017-06-14 17:39:47 +08:00
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
2017-07-14 08:45:21 +08:00
|
|
|
#include <linux/seq_file.h>
|
2019-07-24 07:05:28 +08:00
|
|
|
#include <linux/unicode.h>
|
2017-06-14 17:39:47 +08:00
|
|
|
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "segment.h"
|
|
|
|
#include "gc.h"
|
2020-03-30 11:30:59 +08:00
|
|
|
#include <trace/events/f2fs.h>
|
2017-06-14 17:39:47 +08:00
|
|
|
|
|
|
|
static struct proc_dir_entry *f2fs_proc_root;
|
|
|
|
|
|
|
|
/* Sysfs support for f2fs */
|
|
|
|
enum {
|
|
|
|
GC_THREAD, /* struct f2fs_gc_thread */
|
|
|
|
SM_INFO, /* struct f2fs_sm_info */
|
|
|
|
DCC_INFO, /* struct discard_cmd_control */
|
|
|
|
NM_INFO, /* struct f2fs_nm_info */
|
|
|
|
F2FS_SBI, /* struct f2fs_sb_info */
|
2020-01-23 02:51:16 +08:00
|
|
|
#ifdef CONFIG_F2FS_STAT_FS
|
2020-07-24 16:55:28 +08:00
|
|
|
STAT_INFO, /* struct f2fs_stat_info */
|
2020-01-23 02:51:16 +08:00
|
|
|
#endif
|
2017-06-14 17:39:47 +08:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
FAULT_INFO_RATE, /* struct f2fs_fault_info */
|
|
|
|
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
|
|
|
|
#endif
|
2017-10-27 20:45:05 +08:00
|
|
|
RESERVED_BLOCKS, /* struct f2fs_sb_info */
|
2017-06-14 17:39:47 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct f2fs_attr {
|
|
|
|
struct attribute attr;
|
|
|
|
ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
|
|
|
|
ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
|
|
|
|
const char *, size_t);
|
|
|
|
int struct_type;
|
|
|
|
int offset;
|
2017-07-22 08:14:09 +08:00
|
|
|
int id;
|
2017-06-14 17:39:47 +08:00
|
|
|
};
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf);
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
|
|
|
|
{
|
|
|
|
if (struct_type == GC_THREAD)
|
|
|
|
return (unsigned char *)sbi->gc_thread;
|
|
|
|
else if (struct_type == SM_INFO)
|
|
|
|
return (unsigned char *)SM_I(sbi);
|
|
|
|
else if (struct_type == DCC_INFO)
|
|
|
|
return (unsigned char *)SM_I(sbi)->dcc_info;
|
|
|
|
else if (struct_type == NM_INFO)
|
|
|
|
return (unsigned char *)NM_I(sbi);
|
2017-06-26 16:24:41 +08:00
|
|
|
else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS)
|
2017-06-14 17:39:47 +08:00
|
|
|
return (unsigned char *)sbi;
|
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
else if (struct_type == FAULT_INFO_RATE ||
|
|
|
|
struct_type == FAULT_INFO_TYPE)
|
2018-03-08 14:22:56 +08:00
|
|
|
return (unsigned char *)&F2FS_OPTION(sbi).fault_info;
|
2020-01-23 02:51:16 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_F2FS_STAT_FS
|
|
|
|
else if (struct_type == STAT_INFO)
|
|
|
|
return (unsigned char *)F2FS_STAT(sbi);
|
2017-06-14 17:39:47 +08:00
|
|
|
#endif
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-10-24 15:46:54 +08:00
|
|
|
static ssize_t dirty_segments_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "%llu\n",
|
|
|
|
(unsigned long long)(dirty_segments(sbi)));
|
2017-10-24 15:46:54 +08:00
|
|
|
}
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
static ssize_t free_segments_show(struct f2fs_attr *a,
|
2019-05-30 08:49:06 +08:00
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "%llu\n",
|
|
|
|
(unsigned long long)(free_segments(sbi)));
|
2019-07-24 07:05:28 +08:00
|
|
|
}
|
2019-05-30 08:49:06 +08:00
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
|
|
|
|
if (!sb->s_bdev->bd_part)
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "0\n");
|
2017-06-14 17:39:47 +08:00
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "%llu\n",
|
|
|
|
(unsigned long long)(sbi->kbytes_written +
|
2020-11-27 21:20:06 +08:00
|
|
|
((f2fs_get_sectors_written(sbi) -
|
|
|
|
sbi->sectors_written_start) >> 1)));
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
static ssize_t features_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
int len = 0;
|
|
|
|
|
|
|
|
if (!sb->s_bdev->bd_part)
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "0\n");
|
2017-07-22 08:14:09 +08:00
|
|
|
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_encrypt(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf, PAGE_SIZE - len, "%s",
|
2017-07-22 08:14:09 +08:00
|
|
|
"encryption");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_blkzoned(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2017-07-22 08:14:09 +08:00
|
|
|
len ? ", " : "", "blkzoned");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_extra_attr(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2017-07-22 08:14:09 +08:00
|
|
|
len ? ", " : "", "extra_attr");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_project_quota(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2017-07-22 08:14:09 +08:00
|
|
|
len ? ", " : "", "projquota");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_inode_chksum(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2017-07-22 08:14:09 +08:00
|
|
|
len ? ", " : "", "inode_checksum");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_flexible_inline_xattr(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
len ? ", " : "", "flexible_inline_xattr");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_quota_ino(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2017-10-06 12:03:06 +08:00
|
|
|
len ? ", " : "", "quota_ino");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_inode_crtime(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2018-01-25 14:54:42 +08:00
|
|
|
len ? ", " : "", "inode_crtime");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_lost_found(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2018-03-15 18:51:41 +08:00
|
|
|
len ? ", " : "", "lost_found");
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
if (f2fs_sb_has_verity(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
len ? ", " : "", "verity");
|
2018-10-24 18:34:26 +08:00
|
|
|
if (f2fs_sb_has_sb_chksum(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2018-09-28 20:25:56 +08:00
|
|
|
len ? ", " : "", "sb_checksum");
|
2019-07-24 07:05:28 +08:00
|
|
|
if (f2fs_sb_has_casefold(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2019-07-24 07:05:28 +08:00
|
|
|
len ? ", " : "", "casefold");
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
if (f2fs_sb_has_compression(sbi))
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
len ? ", " : "", "compression");
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
|
2019-10-19 01:06:40 +08:00
|
|
|
len ? ", " : "", "pin_file");
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
|
2017-07-22 08:14:09 +08:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2017-10-27 20:45:05 +08:00
|
|
|
static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "%u\n", sbi->current_reserved_blocks);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t unusable_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
block_t unusable;
|
|
|
|
|
|
|
|
if (test_opt(sbi, DISABLE_CHECKPOINT))
|
|
|
|
unusable = sbi->unusable_block_count;
|
|
|
|
else
|
|
|
|
unusable = f2fs_get_unusable_blocks(sbi);
|
|
|
|
return sprintf(buf, "%llu\n", (unsigned long long)unusable);
|
2017-10-27 20:45:05 +08:00
|
|
|
}
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
static ssize_t encoding_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_UNICODE
|
2020-07-08 17:12:36 +08:00
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
if (f2fs_sb_has_casefold(sbi))
|
|
|
|
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
|
2020-07-08 17:12:36 +08:00
|
|
|
sb->s_encoding->charset,
|
|
|
|
(sb->s_encoding->version >> 16) & 0xff,
|
|
|
|
(sb->s_encoding->version >> 8) & 0xff,
|
|
|
|
sb->s_encoding->version & 0xff);
|
2020-01-23 02:51:16 +08:00
|
|
|
#endif
|
|
|
|
return sprintf(buf, "(none)");
|
|
|
|
}
|
|
|
|
|
2020-02-26 11:08:16 +08:00
|
|
|
static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time);
|
|
|
|
}
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
#ifdef CONFIG_F2FS_STAT_FS
|
|
|
|
static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
struct f2fs_stat_info *si = F2FS_STAT(sbi);
|
|
|
|
|
|
|
|
return sprintf(buf, "%llu\n",
|
|
|
|
(unsigned long long)(si->tot_blks -
|
|
|
|
(si->bg_data_blks + si->bg_node_blks)));
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t moved_blocks_background_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
struct f2fs_stat_info *si = F2FS_STAT(sbi);
|
|
|
|
|
|
|
|
return sprintf(buf, "%llu\n",
|
|
|
|
(unsigned long long)(si->bg_data_blks + si->bg_node_blks));
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t avg_vblocks_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
struct f2fs_stat_info *si = F2FS_STAT(sbi);
|
|
|
|
|
|
|
|
si->dirty_count = dirty_segments(sbi);
|
|
|
|
f2fs_update_sit_info(sbi);
|
|
|
|
return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-07-03 17:51:29 +08:00
|
|
|
static ssize_t main_blkaddr_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
|
|
|
(unsigned long long)MAIN_BLKADDR(sbi));
|
|
|
|
}
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
unsigned char *ptr = NULL;
|
|
|
|
unsigned int *ui;
|
|
|
|
|
|
|
|
ptr = __struct_ptr(sbi, a->struct_type);
|
|
|
|
if (!ptr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-02-26 22:04:13 +08:00
|
|
|
if (!strcmp(a->attr.name, "extension_list")) {
|
|
|
|
__u8 (*extlist)[F2FS_EXTENSION_LEN] =
|
|
|
|
sbi->raw_super->extension_list;
|
2018-02-28 17:07:27 +08:00
|
|
|
int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
|
|
|
|
int hot_count = sbi->raw_super->hot_ext_count;
|
2018-02-26 22:04:13 +08:00
|
|
|
int len = 0, i;
|
|
|
|
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len,
|
2018-04-30 23:27:44 +08:00
|
|
|
"cold file extension:\n");
|
2018-02-28 17:07:27 +08:00
|
|
|
for (i = 0; i < cold_count; i++)
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
|
2018-02-28 17:07:27 +08:00
|
|
|
extlist[i]);
|
|
|
|
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len,
|
2018-04-30 23:27:44 +08:00
|
|
|
"hot file extension:\n");
|
2018-02-28 17:07:27 +08:00
|
|
|
for (i = cold_count; i < cold_count + hot_count; i++)
|
2020-03-11 17:33:53 +08:00
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
|
2018-02-26 22:04:13 +08:00
|
|
|
extlist[i]);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
ui = (unsigned int *)(ptr + a->offset);
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "%u\n", *ui);
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
static ssize_t __sbi_store(struct f2fs_attr *a,
|
2017-06-14 17:39:47 +08:00
|
|
|
struct f2fs_sb_info *sbi,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
unsigned char *ptr;
|
|
|
|
unsigned long t;
|
|
|
|
unsigned int *ui;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
ptr = __struct_ptr(sbi, a->struct_type);
|
|
|
|
if (!ptr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-02-26 22:04:13 +08:00
|
|
|
if (!strcmp(a->attr.name, "extension_list")) {
|
|
|
|
const char *name = strim((char *)buf);
|
2018-02-28 17:07:27 +08:00
|
|
|
bool set = true, hot;
|
|
|
|
|
|
|
|
if (!strncmp(name, "[h]", 3))
|
|
|
|
hot = true;
|
|
|
|
else if (!strncmp(name, "[c]", 3))
|
|
|
|
hot = false;
|
|
|
|
else
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
name += 3;
|
2018-02-26 22:04:13 +08:00
|
|
|
|
2018-02-28 17:07:27 +08:00
|
|
|
if (*name == '!') {
|
2018-02-26 22:04:13 +08:00
|
|
|
name++;
|
|
|
|
set = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strlen(name) >= F2FS_EXTENSION_LEN)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
down_write(&sbi->sb_lock);
|
|
|
|
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
ret = f2fs_update_extension_list(sbi, name, hot, set);
|
2018-02-26 22:04:13 +08:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = f2fs_commit_super(sbi, false);
|
|
|
|
if (ret)
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
f2fs_update_extension_list(sbi, name, hot, !set);
|
2018-02-26 22:04:13 +08:00
|
|
|
out:
|
|
|
|
up_write(&sbi->sb_lock);
|
|
|
|
return ret ? ret : count;
|
|
|
|
}
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
ui = (unsigned int *)(ptr + a->offset);
|
|
|
|
|
|
|
|
ret = kstrtoul(skip_spaces(buf), 0, &t);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
|
|
|
|
return -EINVAL;
|
2019-01-04 17:39:53 +08:00
|
|
|
if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
|
|
|
|
return -EINVAL;
|
2017-06-14 17:39:47 +08:00
|
|
|
#endif
|
2017-06-26 16:24:41 +08:00
|
|
|
if (a->struct_type == RESERVED_BLOCKS) {
|
|
|
|
spin_lock(&sbi->stat_lock);
|
2017-12-28 07:05:52 +08:00
|
|
|
if (t > (unsigned long)(sbi->user_block_count -
|
2018-03-08 14:22:56 +08:00
|
|
|
F2FS_OPTION(sbi).root_reserved_blocks)) {
|
2017-06-26 16:24:41 +08:00
|
|
|
spin_unlock(&sbi->stat_lock);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
*ui = t;
|
2017-10-27 20:45:05 +08:00
|
|
|
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
|
|
|
|
sbi->user_block_count - valid_user_blocks(sbi));
|
2017-06-26 16:24:41 +08:00
|
|
|
spin_unlock(&sbi->stat_lock);
|
|
|
|
return count;
|
|
|
|
}
|
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 23:09:56 +08:00
|
|
|
|
|
|
|
if (!strcmp(a->attr.name, "discard_granularity")) {
|
|
|
|
if (t == 0 || t > MAX_PLIST_NUM)
|
|
|
|
return -EINVAL;
|
|
|
|
if (t == *ui)
|
|
|
|
return count;
|
2017-09-12 14:25:35 +08:00
|
|
|
*ui = t;
|
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 23:09:56 +08:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-10-25 16:19:28 +08:00
|
|
|
if (!strcmp(a->attr.name, "migration_granularity")) {
|
|
|
|
if (t == 0 || t > sbi->segs_per_sec)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-09 10:25:23 +08:00
|
|
|
if (!strcmp(a->attr.name, "trim_sections"))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-05-08 05:22:40 +08:00
|
|
|
if (!strcmp(a->attr.name, "gc_urgent")) {
|
2020-07-02 12:14:14 +08:00
|
|
|
if (t == 0) {
|
|
|
|
sbi->gc_mode = GC_NORMAL;
|
|
|
|
} else if (t == 1) {
|
|
|
|
sbi->gc_mode = GC_URGENT_HIGH;
|
2018-05-08 05:22:40 +08:00
|
|
|
if (sbi->gc_thread) {
|
2018-08-05 12:45:35 +08:00
|
|
|
sbi->gc_thread->gc_wake = 1;
|
2018-05-08 05:22:40 +08:00
|
|
|
wake_up_interruptible_all(
|
|
|
|
&sbi->gc_thread->gc_wait_queue_head);
|
|
|
|
wake_up_discard_thread(sbi, true);
|
|
|
|
}
|
2020-07-02 12:14:14 +08:00
|
|
|
} else if (t == 2) {
|
|
|
|
sbi->gc_mode = GC_URGENT_LOW;
|
2018-05-08 05:22:40 +08:00
|
|
|
} else {
|
2020-07-02 12:14:14 +08:00
|
|
|
return -EINVAL;
|
2018-05-08 05:22:40 +08:00
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
if (!strcmp(a->attr.name, "gc_idle")) {
|
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 21:14:49 +08:00
|
|
|
if (t == GC_IDLE_CB) {
|
2018-05-08 05:22:40 +08:00
|
|
|
sbi->gc_mode = GC_IDLE_CB;
|
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 21:14:49 +08:00
|
|
|
} else if (t == GC_IDLE_GREEDY) {
|
2018-05-08 05:22:40 +08:00
|
|
|
sbi->gc_mode = GC_IDLE_GREEDY;
|
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 21:14:49 +08:00
|
|
|
} else if (t == GC_IDLE_AT) {
|
|
|
|
if (!sbi->am.atgc_enabled)
|
|
|
|
return -EINVAL;
|
|
|
|
sbi->gc_mode = GC_AT;
|
|
|
|
} else {
|
2018-05-08 05:22:40 +08:00
|
|
|
sbi->gc_mode = GC_NORMAL;
|
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm:
- valid blocks is one of key factors during cost overhead calculation,
so if segment has less valid block, however even its age is young or
it locates hot segment, CB algorithm will still choose the segment as
victim, it's not appropriate.
- GCed data/node will go to existing logs, no matter in-there datas'
update frequency is the same or not, it may mix hot and cold data
again.
- GC alloctor mainly use LFS type segment, it will cost free segment
more quickly.
This patch introduces a new algorithm named age threshold based
garbage collection to solve above issues, there are three steps
mainly:
1. select a source victim:
- set an age threshold, and select candidates beased threshold:
e.g.
0 means youngest, 100 means oldest, if we set age threshold to 80
then select dirty segments which has age in range of [80, 100] as
candiddates;
- set candidate_ratio threshold, and select candidates based the
ratio, so that we can shrink candidates to those oldest segments;
- select target segment with fewest valid blocks in order to
migrate blocks with minimum cost;
2. select a target victim:
- select candidates beased age threshold;
- set candidate_radius threshold, search candidates whose age is
around source victims, searching radius should less than the
radius threshold.
- select target segment with most valid blocks in order to avoid
migrating current target segment.
3. merge valid blocks from source victim into target victim with
SSR alloctor.
Test steps:
- create 160 dirty segments:
* half of them have 128 valid blocks per segment
* left of them have 384 valid blocks per segment
- run background GC
Benefit: GC count and block movement count both decrease obviously:
- Before:
- Valid: 86
- Dirty: 1
- Prefree: 11
- Free: 6001 (6001)
GC calls: 162 (BG: 220)
- data segments : 160 (160)
- node segments : 2 (2)
Try to move 41454 blocks (BG: 41454)
- data blocks : 40960 (40960)
- node blocks : 494 (494)
IPU: 0 blocks
SSR: 0 blocks in 0 segments
LFS: 41364 blocks in 81 segments
- After:
- Valid: 87
- Dirty: 0
- Prefree: 4
- Free: 6008 (6008)
GC calls: 75 (BG: 76)
- data segments : 74 (74)
- node segments : 1 (1)
Try to move 12813 blocks (BG: 12813)
- data blocks : 12544 (12544)
- node blocks : 269 (269)
IPU: 0 blocks
SSR: 12032 blocks in 77 segments
LFS: 855 blocks in 2 segments
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2020-08-04 21:14:49 +08:00
|
|
|
}
|
2018-05-08 05:22:40 +08:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:02:15 +08:00
|
|
|
if (!strcmp(a->attr.name, "iostat_enable")) {
|
|
|
|
sbi->iostat_enable = !!t;
|
|
|
|
if (!sbi->iostat_enable)
|
|
|
|
f2fs_reset_iostat(sbi);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2020-03-30 11:30:59 +08:00
|
|
|
if (!strcmp(a->attr.name, "iostat_period_ms")) {
|
|
|
|
if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
|
|
|
|
return -EINVAL;
|
|
|
|
spin_lock(&sbi->iostat_lock);
|
|
|
|
sbi->iostat_period_ms = (unsigned int)t;
|
|
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2019-01-16 04:02:15 +08:00
|
|
|
*ui = (unsigned int)t;
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2018-05-28 16:57:32 +08:00
|
|
|
static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
|
|
|
|
a->struct_type == GC_THREAD);
|
|
|
|
|
2018-07-15 08:58:08 +08:00
|
|
|
if (gc_entry) {
|
|
|
|
if (!down_read_trylock(&sbi->sb->s_umount))
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
f2fs: clean up symbol namespace
As Ted reported:
"Hi, I was looking at f2fs's sources recently, and I noticed that there
is a very large number of non-static symbols which don't have a f2fs
prefix. There's well over a hundred (see attached below).
As one example, in fs/f2fs/dir.c there is:
unsigned char get_de_type(struct f2fs_dir_entry *de)
This function is clearly only useful for f2fs, but it has a generic
name. This means that if any other file system tries to have the same
symbol name, there will be a symbol conflict and the kernel would not
successfully build. It also means that when someone is looking f2fs
sources, it's not at all obvious whether a function such as
read_data_page(), invalidate_blocks(), is a generic kernel function
found in the fs, mm, or block layers, or a f2fs specific function.
You might want to fix this at some point. Hopefully Kent's bcachefs
isn't similarly using genericly named functions, since that might
cause conflicts with f2fs's functions --- but just as this would be a
problem that we would rightly insist that Kent fix, this is something
that we should have rightly insisted that f2fs should have fixed
before it was integrated into the mainline kernel.
acquire_orphan_inode
add_ino_entry
add_orphan_inode
allocate_data_block
allocate_new_segments
alloc_nid
alloc_nid_done
alloc_nid_failed
available_free_memory
...."
This patch adds "f2fs_" prefix for all non-static symbols in order to:
a) avoid conflict with other kernel generic symbols;
b) to indicate the function is f2fs specific one instead of generic
one;
Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2018-05-30 00:20:41 +08:00
|
|
|
ret = __sbi_store(a, sbi, buf, count);
|
2018-05-28 16:57:32 +08:00
|
|
|
if (gc_entry)
|
|
|
|
up_read(&sbi->sb->s_umount);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
static ssize_t f2fs_attr_show(struct kobject *kobj,
|
|
|
|
struct attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
|
|
|
|
s_kobj);
|
|
|
|
struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
|
|
|
|
|
|
|
|
return a->show ? a->show(a, sbi, buf) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
|
|
|
|
const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
|
|
|
|
s_kobj);
|
|
|
|
struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
|
|
|
|
|
|
|
|
return a->store ? a->store(a, sbi, buf, len) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void f2fs_sb_release(struct kobject *kobj)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
|
|
|
|
s_kobj);
|
|
|
|
complete(&sbi->s_kobj_unregister);
|
|
|
|
}
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
enum feat_id {
|
|
|
|
FEAT_CRYPTO = 0,
|
|
|
|
FEAT_BLKZONED,
|
|
|
|
FEAT_ATOMIC_WRITE,
|
|
|
|
FEAT_EXTRA_ATTR,
|
|
|
|
FEAT_PROJECT_QUOTA,
|
|
|
|
FEAT_INODE_CHECKSUM,
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
FEAT_FLEXIBLE_INLINE_XATTR,
|
2017-10-06 12:03:06 +08:00
|
|
|
FEAT_QUOTA_INO,
|
2018-01-25 14:54:42 +08:00
|
|
|
FEAT_INODE_CRTIME,
|
2018-03-15 18:51:41 +08:00
|
|
|
FEAT_LOST_FOUND,
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
FEAT_VERITY,
|
2018-09-28 20:25:56 +08:00
|
|
|
FEAT_SB_CHECKSUM,
|
2019-07-24 07:05:28 +08:00
|
|
|
FEAT_CASEFOLD,
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
FEAT_COMPRESSION,
|
fscrypt: support test_dummy_encryption=v2
v1 encryption policies are deprecated in favor of v2, and some new
features (e.g. encryption+casefolding) are only being added for v2.
Therefore, the "test_dummy_encryption" mount option (which is used for
encryption I/O testing with xfstests) needs to support v2 policies.
To do this, extend its syntax to be "test_dummy_encryption=v1" or
"test_dummy_encryption=v2". The existing "test_dummy_encryption" (no
argument) also continues to be accepted, to specify the default setting
-- currently v1, but the next patch changes it to v2.
To cleanly support both v1 and v2 while also making it easy to support
specifying other encryption settings in the future (say, accepting
"$contents_mode:$filenames_mode:v2"), make ext4 and f2fs maintain a
pointer to the dummy fscrypt_context rather than using mount flags.
To avoid concurrency issues, don't allow test_dummy_encryption to be set
or changed during a remount. (The former restriction is new, but
xfstests doesn't run into it, so no one should notice.)
Tested with 'gce-xfstests -c {ext4,f2fs}/encrypt -g auto'. On ext4,
there are two regressions, both of which are test bugs: ext4/023 and
ext4/028 fail because they set an xattr and expect it to be stored
inline, but the increase in size of the fscrypt_context from
24 to 40 bytes causes this xattr to be spilled into an external block.
Link: https://lore.kernel.org/r/20200512233251.118314-4-ebiggers@kernel.org
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2020-05-13 07:32:50 +08:00
|
|
|
FEAT_TEST_DUMMY_ENCRYPTION_V2,
|
2017-07-22 08:14:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
|
|
|
|
struct f2fs_sb_info *sbi, char *buf)
|
|
|
|
{
|
|
|
|
switch (a->id) {
|
|
|
|
case FEAT_CRYPTO:
|
|
|
|
case FEAT_BLKZONED:
|
|
|
|
case FEAT_ATOMIC_WRITE:
|
|
|
|
case FEAT_EXTRA_ATTR:
|
|
|
|
case FEAT_PROJECT_QUOTA:
|
|
|
|
case FEAT_INODE_CHECKSUM:
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
case FEAT_FLEXIBLE_INLINE_XATTR:
|
2017-10-06 12:03:06 +08:00
|
|
|
case FEAT_QUOTA_INO:
|
2018-01-25 14:54:42 +08:00
|
|
|
case FEAT_INODE_CRTIME:
|
2018-03-15 18:51:41 +08:00
|
|
|
case FEAT_LOST_FOUND:
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
case FEAT_VERITY:
|
2018-09-28 20:25:56 +08:00
|
|
|
case FEAT_SB_CHECKSUM:
|
2019-07-24 07:05:28 +08:00
|
|
|
case FEAT_CASEFOLD:
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
case FEAT_COMPRESSION:
|
fscrypt: support test_dummy_encryption=v2
v1 encryption policies are deprecated in favor of v2, and some new
features (e.g. encryption+casefolding) are only being added for v2.
Therefore, the "test_dummy_encryption" mount option (which is used for
encryption I/O testing with xfstests) needs to support v2 policies.
To do this, extend its syntax to be "test_dummy_encryption=v1" or
"test_dummy_encryption=v2". The existing "test_dummy_encryption" (no
argument) also continues to be accepted, to specify the default setting
-- currently v1, but the next patch changes it to v2.
To cleanly support both v1 and v2 while also making it easy to support
specifying other encryption settings in the future (say, accepting
"$contents_mode:$filenames_mode:v2"), make ext4 and f2fs maintain a
pointer to the dummy fscrypt_context rather than using mount flags.
To avoid concurrency issues, don't allow test_dummy_encryption to be set
or changed during a remount. (The former restriction is new, but
xfstests doesn't run into it, so no one should notice.)
Tested with 'gce-xfstests -c {ext4,f2fs}/encrypt -g auto'. On ext4,
there are two regressions, both of which are test bugs: ext4/023 and
ext4/028 fail because they set an xattr and expect it to be stored
inline, but the increase in size of the fscrypt_context from
24 to 40 bytes causes this xattr to be spilled into an external block.
Link: https://lore.kernel.org/r/20200512233251.118314-4-ebiggers@kernel.org
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2020-05-13 07:32:50 +08:00
|
|
|
case FEAT_TEST_DUMMY_ENCRYPTION_V2:
|
2020-01-23 02:51:16 +08:00
|
|
|
return sprintf(buf, "supported\n");
|
2017-07-22 08:14:09 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
|
|
|
|
static struct f2fs_attr f2fs_attr_##_name = { \
|
|
|
|
.attr = {.name = __stringify(_name), .mode = _mode }, \
|
|
|
|
.show = _show, \
|
|
|
|
.store = _store, \
|
|
|
|
.struct_type = _struct_type, \
|
|
|
|
.offset = _offset \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
|
|
|
|
F2FS_ATTR_OFFSET(struct_type, name, 0644, \
|
|
|
|
f2fs_sbi_show, f2fs_sbi_store, \
|
|
|
|
offsetof(struct struct_name, elname))
|
|
|
|
|
|
|
|
#define F2FS_GENERAL_RO_ATTR(name) \
|
|
|
|
static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
#define F2FS_FEATURE_RO_ATTR(_name, _id) \
|
|
|
|
static struct f2fs_attr f2fs_attr_##_name = { \
|
|
|
|
.attr = {.name = __stringify(_name), .mode = 0444 }, \
|
|
|
|
.show = f2fs_feature_show, \
|
|
|
|
.id = _id, \
|
|
|
|
}
|
|
|
|
|
2020-01-23 02:51:16 +08:00
|
|
|
#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \
|
|
|
|
static struct f2fs_attr f2fs_attr_##_name = { \
|
|
|
|
.attr = {.name = __stringify(_name), .mode = 0444 }, \
|
|
|
|
.show = f2fs_sbi_show, \
|
|
|
|
.struct_type = _struct_type, \
|
|
|
|
.offset = offsetof(struct _struct_name, _elname), \
|
|
|
|
}
|
|
|
|
|
2017-08-07 13:09:00 +08:00
|
|
|
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
|
|
|
|
urgent_sleep_time);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
|
|
|
|
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
|
|
|
|
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
|
2018-05-08 05:22:40 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
|
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
|
|
|
|
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
|
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 23:09:56 +08:00
|
|
|
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
|
2017-06-26 16:24:41 +08:00
|
|
|
F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
|
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
|
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
|
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
|
2018-08-10 08:53:34 +08:00
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
|
2017-10-28 16:52:33 +08:00
|
|
|
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
|
|
|
|
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
|
|
|
|
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
|
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
|
2018-10-25 16:19:28 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
|
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
|
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
|
2018-09-19 16:48:47 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
|
|
|
|
interval_time[DISCARD_TIME]);
|
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
|
2019-01-15 02:42:11 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
|
|
|
|
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
|
2017-08-02 23:21:48 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
|
2020-03-30 11:30:59 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
|
2017-11-22 18:23:38 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
|
2017-12-08 08:25:39 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
|
2018-02-26 22:04:13 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
|
2017-06-14 17:39:47 +08:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
|
|
|
|
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
|
|
|
|
#endif
|
2020-04-03 00:32:35 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
|
2020-06-05 02:49:43 +08:00
|
|
|
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
|
2017-10-24 15:46:54 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(dirty_segments);
|
2020-01-23 02:51:16 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(free_segments);
|
2017-06-14 17:39:47 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
|
2017-07-22 08:14:09 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(features);
|
2017-10-27 20:45:05 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
|
2019-05-30 08:49:06 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(unusable);
|
2019-07-24 07:05:28 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(encoding);
|
2020-02-26 11:08:16 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
|
2020-07-03 17:51:29 +08:00
|
|
|
F2FS_GENERAL_RO_ATTR(main_blkaddr);
|
2020-01-23 02:51:16 +08:00
|
|
|
#ifdef CONFIG_F2FS_STAT_FS
|
|
|
|
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
|
|
|
|
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
|
|
|
|
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count);
|
|
|
|
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc);
|
|
|
|
F2FS_GENERAL_RO_ATTR(moved_blocks_background);
|
|
|
|
F2FS_GENERAL_RO_ATTR(moved_blocks_foreground);
|
|
|
|
F2FS_GENERAL_RO_ATTR(avg_vblocks);
|
|
|
|
#endif
|
2017-07-22 08:14:09 +08:00
|
|
|
|
2018-12-12 17:50:12 +08:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2017-07-22 08:14:09 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
|
fscrypt: support test_dummy_encryption=v2
v1 encryption policies are deprecated in favor of v2, and some new
features (e.g. encryption+casefolding) are only being added for v2.
Therefore, the "test_dummy_encryption" mount option (which is used for
encryption I/O testing with xfstests) needs to support v2 policies.
To do this, extend its syntax to be "test_dummy_encryption=v1" or
"test_dummy_encryption=v2". The existing "test_dummy_encryption" (no
argument) also continues to be accepted, to specify the default setting
-- currently v1, but the next patch changes it to v2.
To cleanly support both v1 and v2 while also making it easy to support
specifying other encryption settings in the future (say, accepting
"$contents_mode:$filenames_mode:v2"), make ext4 and f2fs maintain a
pointer to the dummy fscrypt_context rather than using mount flags.
To avoid concurrency issues, don't allow test_dummy_encryption to be set
or changed during a remount. (The former restriction is new, but
xfstests doesn't run into it, so no one should notice.)
Tested with 'gce-xfstests -c {ext4,f2fs}/encrypt -g auto'. On ext4,
there are two regressions, both of which are test bugs: ext4/023 and
ext4/028 fail because they set an xattr and expect it to be stored
inline, but the increase in size of the fscrypt_context from
24 to 40 bytes causes this xattr to be spilled into an external block.
Link: https://lore.kernel.org/r/20200512233251.118314-4-ebiggers@kernel.org
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2020-05-13 07:32:50 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2);
|
2017-07-22 08:14:09 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
|
|
|
F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED);
|
|
|
|
#endif
|
|
|
|
F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE);
|
|
|
|
F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR);
|
|
|
|
F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
|
|
|
|
F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
|
2017-10-06 12:03:06 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
|
2018-01-25 14:54:42 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
|
2018-03-15 18:51:41 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
#ifdef CONFIG_FS_VERITY
|
|
|
|
F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
|
|
|
|
#endif
|
2018-09-28 20:25:56 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
|
2019-07-24 07:05:28 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
|
2020-03-27 18:29:00 +08:00
|
|
|
#ifdef CONFIG_F2FS_FS_COMPRESSION
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION);
|
2020-03-27 18:29:00 +08:00
|
|
|
#endif
|
2017-06-14 17:39:47 +08:00
|
|
|
|
|
|
|
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
|
|
|
|
static struct attribute *f2fs_attrs[] = {
|
2017-08-07 13:09:00 +08:00
|
|
|
ATTR_LIST(gc_urgent_sleep_time),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(gc_min_sleep_time),
|
|
|
|
ATTR_LIST(gc_max_sleep_time),
|
|
|
|
ATTR_LIST(gc_no_gc_sleep_time),
|
|
|
|
ATTR_LIST(gc_idle),
|
2017-08-07 13:09:00 +08:00
|
|
|
ATTR_LIST(gc_urgent),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(reclaim_segments),
|
2019-11-23 03:53:10 +08:00
|
|
|
ATTR_LIST(main_blkaddr),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(max_small_discards),
|
f2fs: introduce discard_granularity sysfs entry
Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables
f2fs to issue 4K size discard in real-time discard mode. However, issuing
smaller discard may cost more lifetime but releasing less free space in
flash device. Since f2fs has ability of separating hot/cold data and
garbage collection, we can expect that small-sized invalid region would
expand soon with OPU, deletion or garbage collection on valid datas, so
it's better to delay or skip issuing smaller size discards, it could help
to reduce overmuch consumption of IO bandwidth and lifetime of flash
storage.
This patch makes f2fs selectng 64K size as its default minimal
granularity, and issue discard with the size which is not smaller than
minimal granularity. Also it exposes discard granularity as sysfs entry
for configuration in different scenario.
Jaegeuk Kim:
We must issue all the accumulated discard commands when fstrim is called.
So, I've added pend_list_tag[] to indicate whether we should issue the
commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them.
P_TRIM is set once at a time, given fstrim trigger.
In addition, issue_discard_thread is calling too much due to the number of
discard commands remaining in the pending list. I added a timer to control
it likewise gc_thread.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-07 23:09:56 +08:00
|
|
|
ATTR_LIST(discard_granularity),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(batched_trim_sections),
|
|
|
|
ATTR_LIST(ipu_policy),
|
|
|
|
ATTR_LIST(min_ipu_util),
|
|
|
|
ATTR_LIST(min_fsync_blocks),
|
2018-08-10 08:53:34 +08:00
|
|
|
ATTR_LIST(min_seq_blocks),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(min_hot_blocks),
|
2017-10-28 16:52:33 +08:00
|
|
|
ATTR_LIST(min_ssr_sections),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(max_victim_search),
|
2018-10-25 16:19:28 +08:00
|
|
|
ATTR_LIST(migration_granularity),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(dir_level),
|
|
|
|
ATTR_LIST(ram_thresh),
|
|
|
|
ATTR_LIST(ra_nid_pages),
|
|
|
|
ATTR_LIST(dirty_nats_ratio),
|
|
|
|
ATTR_LIST(cp_interval),
|
|
|
|
ATTR_LIST(idle_interval),
|
2018-09-19 16:48:47 +08:00
|
|
|
ATTR_LIST(discard_idle_interval),
|
|
|
|
ATTR_LIST(gc_idle_interval),
|
2019-01-15 02:42:11 +08:00
|
|
|
ATTR_LIST(umount_discard_timeout),
|
2017-08-02 23:21:48 +08:00
|
|
|
ATTR_LIST(iostat_enable),
|
2020-03-30 11:30:59 +08:00
|
|
|
ATTR_LIST(iostat_period_ms),
|
2017-11-22 18:23:38 +08:00
|
|
|
ATTR_LIST(readdir_ra),
|
2017-12-08 08:25:39 +08:00
|
|
|
ATTR_LIST(gc_pin_file_thresh),
|
2018-02-26 22:04:13 +08:00
|
|
|
ATTR_LIST(extension_list),
|
2017-06-14 17:39:47 +08:00
|
|
|
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
|
|
|
ATTR_LIST(inject_rate),
|
|
|
|
ATTR_LIST(inject_type),
|
|
|
|
#endif
|
2020-04-03 00:32:35 +08:00
|
|
|
ATTR_LIST(data_io_flag),
|
2020-06-05 02:49:43 +08:00
|
|
|
ATTR_LIST(node_io_flag),
|
2017-10-24 15:46:54 +08:00
|
|
|
ATTR_LIST(dirty_segments),
|
2020-01-23 02:51:16 +08:00
|
|
|
ATTR_LIST(free_segments),
|
2019-05-30 08:49:06 +08:00
|
|
|
ATTR_LIST(unusable),
|
2017-06-14 17:39:47 +08:00
|
|
|
ATTR_LIST(lifetime_write_kbytes),
|
2017-07-22 08:14:09 +08:00
|
|
|
ATTR_LIST(features),
|
2017-06-26 16:24:41 +08:00
|
|
|
ATTR_LIST(reserved_blocks),
|
2017-10-27 20:45:05 +08:00
|
|
|
ATTR_LIST(current_reserved_blocks),
|
2019-07-24 07:05:28 +08:00
|
|
|
ATTR_LIST(encoding),
|
2020-02-26 11:08:16 +08:00
|
|
|
ATTR_LIST(mounted_time_sec),
|
2020-01-23 02:51:16 +08:00
|
|
|
#ifdef CONFIG_F2FS_STAT_FS
|
|
|
|
ATTR_LIST(cp_foreground_calls),
|
|
|
|
ATTR_LIST(cp_background_calls),
|
|
|
|
ATTR_LIST(gc_foreground_calls),
|
|
|
|
ATTR_LIST(gc_background_calls),
|
|
|
|
ATTR_LIST(moved_blocks_foreground),
|
|
|
|
ATTR_LIST(moved_blocks_background),
|
|
|
|
ATTR_LIST(avg_vblocks),
|
|
|
|
#endif
|
2017-06-14 17:39:47 +08:00
|
|
|
NULL,
|
|
|
|
};
|
2019-06-08 01:40:41 +08:00
|
|
|
ATTRIBUTE_GROUPS(f2fs);
|
2017-06-14 17:39:47 +08:00
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
static struct attribute *f2fs_feat_attrs[] = {
|
2018-12-12 17:50:12 +08:00
|
|
|
#ifdef CONFIG_FS_ENCRYPTION
|
2017-07-22 08:14:09 +08:00
|
|
|
ATTR_LIST(encryption),
|
fscrypt: support test_dummy_encryption=v2
v1 encryption policies are deprecated in favor of v2, and some new
features (e.g. encryption+casefolding) are only being added for v2.
Therefore, the "test_dummy_encryption" mount option (which is used for
encryption I/O testing with xfstests) needs to support v2 policies.
To do this, extend its syntax to be "test_dummy_encryption=v1" or
"test_dummy_encryption=v2". The existing "test_dummy_encryption" (no
argument) also continues to be accepted, to specify the default setting
-- currently v1, but the next patch changes it to v2.
To cleanly support both v1 and v2 while also making it easy to support
specifying other encryption settings in the future (say, accepting
"$contents_mode:$filenames_mode:v2"), make ext4 and f2fs maintain a
pointer to the dummy fscrypt_context rather than using mount flags.
To avoid concurrency issues, don't allow test_dummy_encryption to be set
or changed during a remount. (The former restriction is new, but
xfstests doesn't run into it, so no one should notice.)
Tested with 'gce-xfstests -c {ext4,f2fs}/encrypt -g auto'. On ext4,
there are two regressions, both of which are test bugs: ext4/023 and
ext4/028 fail because they set an xattr and expect it to be stored
inline, but the increase in size of the fscrypt_context from
24 to 40 bytes causes this xattr to be spilled into an external block.
Link: https://lore.kernel.org/r/20200512233251.118314-4-ebiggers@kernel.org
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2020-05-13 07:32:50 +08:00
|
|
|
ATTR_LIST(test_dummy_encryption_v2),
|
2017-07-22 08:14:09 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
|
|
|
ATTR_LIST(block_zoned),
|
|
|
|
#endif
|
|
|
|
ATTR_LIST(atomic_write),
|
|
|
|
ATTR_LIST(extra_attr),
|
|
|
|
ATTR_LIST(project_quota),
|
|
|
|
ATTR_LIST(inode_checksum),
|
f2fs: support flexible inline xattr size
Now, in product, more and more features based on file encryption were
introduced, their demand of xattr space is increasing, however, inline
xattr has fixed-size of 200 bytes, once inline xattr space is full, new
increased xattr data would occupy additional xattr block which may bring
us more space usage and performance regression during persisting.
In order to resolve above issue, it's better to expand inline xattr size
flexibly according to user's requirement.
So this patch introduces new filesystem feature 'flexible inline xattr',
and new mount option 'inline_xattr_size=%u', once mkfs enables the
feature, we can use the option to make f2fs supporting flexible inline
xattr size.
To support this feature, we add extra attribute i_inline_xattr_size in
inode layout, indicating that how many space inline xattr borrows from
block address mapping space in inode layout, by this, we can easily
locate and store flexible-sized inline xattr data in inode.
Inode disk layout:
+----------------------+
| .i_mode |
| ... |
| .i_ext |
+----------------------+
| .i_extra_isize |
| .i_inline_xattr_size |-----------+
| ... | |
+----------------------+ |
| .i_addr | |
| - block address or | |
| - inline data | |
+----------------------+<---+ v
| inline xattr | +---inline xattr range
+----------------------+<---+
| .i_nid |
+----------------------+
| node_footer |
| (nid, ino, offset) |
+----------------------+
Note that, we have to cnosider backward compatibility which reserved
inline_data space, 200 bytes, all the time, reported by Sheng Yong.
Previous inline data or directory always reserved 200 bytes in inode layout,
even if inline_xattr is disabled. In order to keep inline_dentry's structure
for backward compatibility, we get the space back only from inline_data.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06 21:59:50 +08:00
|
|
|
ATTR_LIST(flexible_inline_xattr),
|
2017-10-06 12:03:06 +08:00
|
|
|
ATTR_LIST(quota_ino),
|
2018-01-25 14:54:42 +08:00
|
|
|
ATTR_LIST(inode_crtime),
|
2018-03-15 18:51:41 +08:00
|
|
|
ATTR_LIST(lost_found),
|
f2fs: add fs-verity support
Add fs-verity support to f2fs. fs-verity is a filesystem feature that
enables transparent integrity protection and authentication of read-only
files. It uses a dm-verity like mechanism at the file level: a Merkle
tree is used to verify any block in the file in log(filesize) time. It
is implemented mainly by helper functions in fs/verity/. See
Documentation/filesystems/fsverity.rst for the full documentation.
The f2fs support for fs-verity consists of:
- Adding a filesystem feature flag and an inode flag for fs-verity.
- Implementing the fsverity_operations to support enabling verity on an
inode and reading/writing the verity metadata.
- Updating ->readpages() to verify data as it's read from verity files
and to support reading verity metadata pages.
- Updating ->write_begin(), ->write_end(), and ->writepages() to support
writing verity metadata pages.
- Calling the fs-verity hooks for ->open(), ->setattr(), and ->ioctl().
Like ext4, f2fs stores the verity metadata (Merkle tree and
fsverity_descriptor) past the end of the file, starting at the first 64K
boundary beyond i_size. This approach works because (a) verity files
are readonly, and (b) pages fully beyond i_size aren't visible to
userspace but can be read/written internally by f2fs with only some
relatively small changes to f2fs. Extended attributes cannot be used
because (a) f2fs limits the total size of an inode's xattr entries to
4096 bytes, which wouldn't be enough for even a single Merkle tree
block, and (b) f2fs encryption doesn't encrypt xattrs, yet the verity
metadata *must* be encrypted when the file is because it contains hashes
of the plaintext data.
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-07-23 00:26:24 +08:00
|
|
|
#ifdef CONFIG_FS_VERITY
|
|
|
|
ATTR_LIST(verity),
|
|
|
|
#endif
|
2018-09-28 20:25:56 +08:00
|
|
|
ATTR_LIST(sb_checksum),
|
2019-07-24 07:05:28 +08:00
|
|
|
ATTR_LIST(casefold),
|
2020-03-27 18:29:00 +08:00
|
|
|
#ifdef CONFIG_F2FS_FS_COMPRESSION
|
f2fs: support data compression
This patch tries to support compression in f2fs.
- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.
- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.
- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.
- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
Compress metadata layout:
[Dnode Structure]
+-----------------------------------------------+
| cluster 1 | cluster 2 | ......... | cluster N |
+-----------------------------------------------+
. . . .
. . . .
. Compressed Cluster . . Normal Cluster .
+----------+---------+---------+---------+ +---------+---------+---------+---------+
|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 |
+----------+---------+---------+---------+ +---------+---------+---------+---------+
. .
. .
. .
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+
Changelog:
20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().
20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().
20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().
20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.
20190402
- don't preallocate blocks for compressed file.
- add lz4 compress algorithm
- process multiple post read works in one workqueue
Now f2fs supports processing post read work in multiple workqueue,
it shows low performance due to schedule overhead of multiple
workqueue executing orderly.
20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR
One cluster contain 4 blocks
before overwrite after overwrite
- VVVV -> CVNN
- CVNN -> VVVV
- CVNN -> CVNN
- CVNN -> CVVV
- CVVV -> CVNN
- CVVV -> CVVV
20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.
20191101
- apply fixes from Jaegeuk
20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity
20191216
- apply fixes from Jaegeuk
20200117
- fix to avoid NULL pointer dereference
[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats
- fix overwrite/mmap bugs
- address 32bit build error, reported by Geert.
- bug fixes when handling errors and i_compressed_blocks
Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2019-11-01 18:07:14 +08:00
|
|
|
ATTR_LIST(compression),
|
2020-03-27 18:29:00 +08:00
|
|
|
#endif
|
2017-07-22 08:14:09 +08:00
|
|
|
NULL,
|
|
|
|
};
|
2019-06-08 01:40:41 +08:00
|
|
|
ATTRIBUTE_GROUPS(f2fs_feat);
|
2017-07-22 08:14:09 +08:00
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
static const struct sysfs_ops f2fs_attr_ops = {
|
|
|
|
.show = f2fs_attr_show,
|
|
|
|
.store = f2fs_attr_store,
|
|
|
|
};
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
static struct kobj_type f2fs_sb_ktype = {
|
2019-06-08 01:40:41 +08:00
|
|
|
.default_groups = f2fs_groups,
|
2017-06-14 17:39:47 +08:00
|
|
|
.sysfs_ops = &f2fs_attr_ops,
|
|
|
|
.release = f2fs_sb_release,
|
|
|
|
};
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
static struct kobj_type f2fs_ktype = {
|
|
|
|
.sysfs_ops = &f2fs_attr_ops,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct kset f2fs_kset = {
|
2020-07-24 16:55:28 +08:00
|
|
|
.kobj = {.ktype = &f2fs_ktype},
|
2017-07-22 08:14:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct kobj_type f2fs_feat_ktype = {
|
2019-06-08 01:40:41 +08:00
|
|
|
.default_groups = f2fs_feat_groups,
|
2017-07-22 08:14:09 +08:00
|
|
|
.sysfs_ops = &f2fs_attr_ops,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct kobject f2fs_feat = {
|
|
|
|
.kset = &f2fs_kset,
|
|
|
|
};
|
|
|
|
|
2018-07-07 11:50:57 +08:00
|
|
|
static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
|
|
|
|
void *offset)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
|
|
|
struct super_block *sb = seq->private;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
unsigned int total_segs =
|
|
|
|
le32_to_cpu(sbi->raw_super->segment_count_main);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
seq_puts(seq, "format: segment_type|valid_blocks\n"
|
|
|
|
"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
|
|
|
|
|
|
|
|
for (i = 0; i < total_segs; i++) {
|
|
|
|
struct seg_entry *se = get_seg_entry(sbi, i);
|
|
|
|
|
|
|
|
if ((i % 10) == 0)
|
|
|
|
seq_printf(seq, "%-10d", i);
|
2019-06-18 18:00:09 +08:00
|
|
|
seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks);
|
2017-06-14 17:39:47 +08:00
|
|
|
if ((i % 10) == 9 || i == (total_segs - 1))
|
|
|
|
seq_putc(seq, '\n');
|
|
|
|
else
|
|
|
|
seq_putc(seq, ' ');
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-07-07 11:50:57 +08:00
|
|
|
static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
|
|
|
|
void *offset)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
|
|
|
struct super_block *sb = seq->private;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
unsigned int total_segs =
|
|
|
|
le32_to_cpu(sbi->raw_super->segment_count_main);
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
|
|
|
|
"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
|
|
|
|
|
|
|
|
for (i = 0; i < total_segs; i++) {
|
|
|
|
struct seg_entry *se = get_seg_entry(sbi, i);
|
|
|
|
|
|
|
|
seq_printf(seq, "%-10d", i);
|
2019-06-18 18:00:09 +08:00
|
|
|
seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks);
|
2017-06-14 17:39:47 +08:00
|
|
|
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
|
|
|
|
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
|
|
|
|
seq_putc(seq, '\n');
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-30 11:30:59 +08:00
|
|
|
void f2fs_record_iostat(struct f2fs_sb_info *sbi)
|
|
|
|
{
|
|
|
|
unsigned long long iostat_diff[NR_IO_TYPE];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (time_is_after_jiffies(sbi->iostat_next_period))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Need double check under the lock */
|
|
|
|
spin_lock(&sbi->iostat_lock);
|
|
|
|
if (time_is_after_jiffies(sbi->iostat_next_period)) {
|
|
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
sbi->iostat_next_period = jiffies +
|
|
|
|
msecs_to_jiffies(sbi->iostat_period_ms);
|
|
|
|
|
|
|
|
for (i = 0; i < NR_IO_TYPE; i++) {
|
2020-04-16 18:16:56 +08:00
|
|
|
iostat_diff[i] = sbi->rw_iostat[i] -
|
|
|
|
sbi->prev_rw_iostat[i];
|
|
|
|
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
|
2020-03-30 11:30:59 +08:00
|
|
|
}
|
|
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
|
|
|
|
trace_f2fs_iostat(sbi, iostat_diff);
|
|
|
|
}
|
|
|
|
|
2018-07-07 11:50:57 +08:00
|
|
|
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
|
|
|
|
void *offset)
|
2017-08-02 23:21:48 +08:00
|
|
|
{
|
|
|
|
struct super_block *sb = seq->private;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
time64_t now = ktime_get_real_seconds();
|
|
|
|
|
|
|
|
if (!sbi->iostat_enable)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
seq_printf(seq, "time: %-16llu\n", now);
|
|
|
|
|
2020-04-16 18:16:56 +08:00
|
|
|
/* print app write IOs */
|
2020-04-23 18:03:06 +08:00
|
|
|
seq_puts(seq, "[WRITE]\n");
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "app buffered: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[APP_BUFFERED_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "app direct: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[APP_DIRECT_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "app mapped: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[APP_MAPPED_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
|
2020-04-16 18:16:56 +08:00
|
|
|
/* print fs write IOs */
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs data: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_DATA_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs node: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_NODE_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs meta: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_META_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs gc data: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_GC_DATA_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs gc node: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_GC_NODE_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs cp data: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_CP_DATA_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs cp node: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_CP_NODE_IO]);
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs cp meta: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_CP_META_IO]);
|
|
|
|
|
|
|
|
/* print app read IOs */
|
2020-04-23 18:03:06 +08:00
|
|
|
seq_puts(seq, "[READ]\n");
|
2020-04-16 18:16:56 +08:00
|
|
|
seq_printf(seq, "app buffered: %-16llu\n",
|
|
|
|
sbi->rw_iostat[APP_BUFFERED_READ_IO]);
|
|
|
|
seq_printf(seq, "app direct: %-16llu\n",
|
|
|
|
sbi->rw_iostat[APP_DIRECT_READ_IO]);
|
|
|
|
seq_printf(seq, "app mapped: %-16llu\n",
|
|
|
|
sbi->rw_iostat[APP_MAPPED_READ_IO]);
|
|
|
|
|
|
|
|
/* print fs read IOs */
|
|
|
|
seq_printf(seq, "fs data: %-16llu\n",
|
|
|
|
sbi->rw_iostat[FS_DATA_READ_IO]);
|
2020-04-23 18:03:06 +08:00
|
|
|
seq_printf(seq, "fs gc data: %-16llu\n",
|
|
|
|
sbi->rw_iostat[FS_GDATA_READ_IO]);
|
|
|
|
seq_printf(seq, "fs compr_data: %-16llu\n",
|
|
|
|
sbi->rw_iostat[FS_CDATA_READ_IO]);
|
2020-04-16 18:16:56 +08:00
|
|
|
seq_printf(seq, "fs node: %-16llu\n",
|
|
|
|
sbi->rw_iostat[FS_NODE_READ_IO]);
|
|
|
|
seq_printf(seq, "fs meta: %-16llu\n",
|
|
|
|
sbi->rw_iostat[FS_META_READ_IO]);
|
|
|
|
|
|
|
|
/* print other IOs */
|
2020-04-23 18:03:06 +08:00
|
|
|
seq_puts(seq, "[OTHER]\n");
|
2017-08-02 23:21:48 +08:00
|
|
|
seq_printf(seq, "fs discard: %-16llu\n",
|
2020-04-16 18:16:56 +08:00
|
|
|
sbi->rw_iostat[FS_DISCARD]);
|
2017-08-02 23:21:48 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-07-23 22:10:22 +08:00
|
|
|
static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
|
|
|
|
void *offset)
|
|
|
|
{
|
|
|
|
struct super_block *sb = seq->private;
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
|
|
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
seq_puts(seq, "format: victim_secmap bitmaps\n");
|
|
|
|
|
|
|
|
for (i = 0; i < MAIN_SECS(sbi); i++) {
|
|
|
|
if ((i % 10) == 0)
|
|
|
|
seq_printf(seq, "%-10d", i);
|
|
|
|
seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
|
|
|
|
if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
|
|
|
|
seq_putc(seq, '\n');
|
|
|
|
else
|
|
|
|
seq_putc(seq, ' ');
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-27 02:24:13 +08:00
|
|
|
int __init f2fs_init_sysfs(void)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
2017-07-22 08:14:09 +08:00
|
|
|
int ret;
|
2017-06-14 17:39:47 +08:00
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
kobject_set_name(&f2fs_kset.kobj, "f2fs");
|
|
|
|
f2fs_kset.kobj.parent = fs_kobj;
|
|
|
|
ret = kset_register(&f2fs_kset);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
|
|
|
|
NULL, "features");
|
2019-12-30 17:41:41 +08:00
|
|
|
if (ret) {
|
|
|
|
kobject_put(&f2fs_feat);
|
2017-07-22 08:14:09 +08:00
|
|
|
kset_unregister(&f2fs_kset);
|
2019-12-30 17:41:41 +08:00
|
|
|
} else {
|
2017-07-22 08:14:09 +08:00
|
|
|
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
|
2019-12-30 17:41:41 +08:00
|
|
|
}
|
2017-07-22 08:14:09 +08:00
|
|
|
return ret;
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|
|
|
|
|
2017-07-27 02:24:13 +08:00
|
|
|
void f2fs_exit_sysfs(void)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
2017-07-22 08:14:09 +08:00
|
|
|
kobject_put(&f2fs_feat);
|
|
|
|
kset_unregister(&f2fs_kset);
|
2017-06-14 17:39:47 +08:00
|
|
|
remove_proc_entry("fs/f2fs", NULL);
|
2017-07-22 08:14:09 +08:00
|
|
|
f2fs_proc_root = NULL;
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|
|
|
|
|
2017-07-27 02:24:13 +08:00
|
|
|
int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
|
|
|
struct super_block *sb = sbi->sb;
|
|
|
|
int err;
|
|
|
|
|
2017-07-22 08:14:09 +08:00
|
|
|
sbi->s_kobj.kset = &f2fs_kset;
|
|
|
|
init_completion(&sbi->s_kobj_unregister);
|
|
|
|
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL,
|
|
|
|
"%s", sb->s_id);
|
2019-12-30 17:41:41 +08:00
|
|
|
if (err) {
|
|
|
|
kobject_put(&sbi->s_kobj);
|
|
|
|
wait_for_completion(&sbi->s_kobj_unregister);
|
2017-07-22 08:14:09 +08:00
|
|
|
return err;
|
2019-12-30 17:41:41 +08:00
|
|
|
}
|
2017-07-22 08:14:09 +08:00
|
|
|
|
2017-06-14 17:39:47 +08:00
|
|
|
if (f2fs_proc_root)
|
|
|
|
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
|
|
|
|
|
|
|
|
if (sbi->s_proc) {
|
2018-05-15 21:57:23 +08:00
|
|
|
proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
|
|
|
|
segment_info_seq_show, sb);
|
|
|
|
proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
|
|
|
|
segment_bits_seq_show, sb);
|
|
|
|
proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
|
|
|
|
iostat_info_seq_show, sb);
|
2018-07-23 22:10:22 +08:00
|
|
|
proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
|
|
|
|
victim_bits_seq_show, sb);
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-07-27 02:24:13 +08:00
|
|
|
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
|
2017-06-14 17:39:47 +08:00
|
|
|
{
|
|
|
|
if (sbi->s_proc) {
|
2017-08-02 23:21:48 +08:00
|
|
|
remove_proc_entry("iostat_info", sbi->s_proc);
|
2017-06-14 17:39:47 +08:00
|
|
|
remove_proc_entry("segment_info", sbi->s_proc);
|
|
|
|
remove_proc_entry("segment_bits", sbi->s_proc);
|
2018-07-23 22:10:22 +08:00
|
|
|
remove_proc_entry("victim_bits", sbi->s_proc);
|
2017-06-14 17:39:47 +08:00
|
|
|
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
|
|
|
|
}
|
2017-07-22 08:14:09 +08:00
|
|
|
kobject_del(&sbi->s_kobj);
|
2019-12-14 10:32:16 +08:00
|
|
|
kobject_put(&sbi->s_kobj);
|
2020-10-12 21:09:48 +08:00
|
|
|
wait_for_completion(&sbi->s_kobj_unregister);
|
2017-06-14 17:39:47 +08:00
|
|
|
}
|