mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-21 03:33:59 +08:00
75b463d2b4
Since commit d4682ba03e
("Btrfs: sync log after logging new name") we
started to commit logs, and fallback to transaction commits when we failed
to log the new names or commit the logs, after link and rename operations
when the target inodes (or their parents) were previously logged in the
current transaction. This was to avoid losing directories despite an
explicit fsync on them when they are ancestors of some inode that got a
new named logged, due to a link or rename operation. However that adds the
cost of starting IO and waiting for it to complete, which can cause higher
latencies for applications.
Instead of doing that, just make sure that when we log a new name for an
inode we don't mark any of its ancestors as logged, so that if any one
does an fsync against any of them, without doing any other change on them,
the fsync commits the log. This way we only pay the cost of a log commit
(or a transaction commit if something goes wrong or a new block group was
created) if the application explicitly asks to fsync any of the parent
directories.
Using dbench, which mixes several filesystems operations including renames,
revealed some significant latency gains. The following script that uses
dbench was used to test this:
#!/bin/bash
DEV=/dev/nvme0n1
MNT=/mnt/btrfs
MOUNT_OPTIONS="-o ssd -o space_cache=v2"
MKFS_OPTIONS="-m single -d single"
THREADS=16
echo "performance" | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
mkfs.btrfs -f $MKFS_OPTIONS $DEV
mount $MOUNT_OPTIONS $DEV $MNT
dbench -t 300 -D $MNT $THREADS
umount $MNT
The test was run on bare metal, no virtualization, on a box with 12 cores
(Intel i7-8700), 64Gb of RAM and using a NVMe device, with a kernel
configuration that is the default of typical distributions (debian in this
case), without debug options enabled (kasan, kmemleak, slub debug, debug
of page allocations, lock debugging, etc).
Results before this patch:
Operation Count AvgLat MaxLat
----------------------------------------
NTCreateX 10750455 0.011 155.088
Close 7896674 0.001 0.243
Rename 455222 2.158 1101.947
Unlink 2171189 0.067 121.638
Deltree 256 2.425 7.816
Mkdir 128 0.002 0.003
Qpathinfo 9744323 0.006 21.370
Qfileinfo 1707092 0.001 0.146
Qfsinfo 1786756 0.001 11.228
Sfileinfo 875612 0.003 21.263
Find 3767281 0.025 9.617
WriteX 5356924 0.011 211.390
ReadX 16852694 0.003 9.442
LockX 35008 0.002 0.119
UnlockX 35008 0.001 0.138
Flush 753458 4.252 1102.249
Throughput 1128.35 MB/sec 16 clients 16 procs max_latency=1102.255 ms
Results after this patch:
16 clients, after
Operation Count AvgLat MaxLat
----------------------------------------
NTCreateX 11471098 0.012 448.281
Close 8426396 0.001 0.925
Rename 485746 0.123 267.183
Unlink 2316477 0.080 63.433
Deltree 288 2.830 11.144
Mkdir 144 0.003 0.010
Qpathinfo 10397420 0.006 10.288
Qfileinfo 1822039 0.001 0.169
Qfsinfo 1906497 0.002 14.039
Sfileinfo 934433 0.004 2.438
Find 4019879 0.026 10.200
WriteX 5718932 0.011 200.985
ReadX 17981671 0.003 10.036
LockX 37352 0.002 0.076
UnlockX 37352 0.001 0.109
Flush 804018 5.015 778.033
Throughput 1201.98 MB/sec 16 clients 16 procs max_latency=778.036 ms
(+6.5% throughput, -29.4% max latency, -75.8% rename latency)
Test case generic/498 from fstests tests the scenario that the previously
mentioned commit fixed.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
77 lines
2.4 KiB
C
77 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2008 Oracle. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_TREE_LOG_H
|
|
#define BTRFS_TREE_LOG_H
|
|
|
|
#include "ctree.h"
|
|
#include "transaction.h"
|
|
|
|
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
|
|
#define BTRFS_NO_LOG_SYNC 256
|
|
|
|
struct btrfs_log_ctx {
|
|
int log_ret;
|
|
int log_transid;
|
|
bool log_new_dentries;
|
|
bool logging_new_name;
|
|
struct inode *inode;
|
|
struct list_head list;
|
|
};
|
|
|
|
static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
|
|
struct inode *inode)
|
|
{
|
|
ctx->log_ret = 0;
|
|
ctx->log_transid = 0;
|
|
ctx->log_new_dentries = false;
|
|
ctx->logging_new_name = false;
|
|
ctx->inode = inode;
|
|
INIT_LIST_HEAD(&ctx->list);
|
|
}
|
|
|
|
static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
|
|
{
|
|
WRITE_ONCE(trans->fs_info->last_trans_log_full_commit, trans->transid);
|
|
}
|
|
|
|
static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans)
|
|
{
|
|
return READ_ONCE(trans->fs_info->last_trans_log_full_commit) ==
|
|
trans->transid;
|
|
}
|
|
|
|
int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root, struct btrfs_log_ctx *ctx);
|
|
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
|
|
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
|
|
struct btrfs_fs_info *fs_info);
|
|
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
|
|
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
|
|
struct dentry *dentry,
|
|
const loff_t start,
|
|
const loff_t end,
|
|
struct btrfs_log_ctx *ctx);
|
|
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
const char *name, int name_len,
|
|
struct btrfs_inode *dir, u64 index);
|
|
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
|
struct btrfs_root *root,
|
|
const char *name, int name_len,
|
|
struct btrfs_inode *inode, u64 dirid);
|
|
void btrfs_end_log_trans(struct btrfs_root *root);
|
|
void btrfs_pin_log_trans(struct btrfs_root *root);
|
|
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
|
struct btrfs_inode *dir, struct btrfs_inode *inode,
|
|
int for_rename);
|
|
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
|
|
struct btrfs_inode *dir);
|
|
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
|
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
|
|
struct dentry *parent);
|
|
|
|
#endif
|