mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
bcachefs: KEY_TYPE_accounting
New key type for the disk space accounting rewrite. - Holds a variable sized array of u64s (may be more than one for accounting e.g. compressed and uncompressed size, or buckets and sectors for a given data type) - Updates are deltas, not new versions of the key: this means updates to accounting can happen via the btree write buffer, which we'll be teaching to accumulate deltas. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
929d954330
commit
2744e5c9eb
@ -29,10 +29,11 @@ bcachefs-y := \
|
||||
clock.o \
|
||||
compress.o \
|
||||
darray.o \
|
||||
data_update.o \
|
||||
debug.o \
|
||||
dirent.o \
|
||||
disk_accounting.o \
|
||||
disk_groups.o \
|
||||
data_update.o \
|
||||
ec.o \
|
||||
errcode.o \
|
||||
error.o \
|
||||
|
@ -417,7 +417,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(bucket_gens, 30) \
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33)
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@ -505,6 +506,9 @@ struct bch_sb_field {
|
||||
x(downgrade, 14)
|
||||
|
||||
#include "alloc_background_format.h"
|
||||
#include "dirent_format.h"
|
||||
#include "disk_accounting_format.h"
|
||||
#include "disk_groups_format.h"
|
||||
#include "extents_format.h"
|
||||
#include "ec_format.h"
|
||||
#include "dirent_format.h"
|
||||
@ -602,49 +606,6 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
|
||||
LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
|
||||
LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
|
||||
|
||||
#define BCH_DATA_TYPES() \
|
||||
x(free, 0) \
|
||||
x(sb, 1) \
|
||||
x(journal, 2) \
|
||||
x(btree, 3) \
|
||||
x(user, 4) \
|
||||
x(cached, 5) \
|
||||
x(parity, 6) \
|
||||
x(stripe, 7) \
|
||||
x(need_gc_gens, 8) \
|
||||
x(need_discard, 9) \
|
||||
x(unstriped, 10)
|
||||
|
||||
enum bch_data_type {
|
||||
#define x(t, n) BCH_DATA_##t,
|
||||
BCH_DATA_TYPES()
|
||||
#undef x
|
||||
BCH_DATA_NR
|
||||
};
|
||||
|
||||
static inline bool data_type_is_empty(enum bch_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BCH_DATA_free:
|
||||
case BCH_DATA_need_gc_gens:
|
||||
case BCH_DATA_need_discard:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool data_type_is_hidden(enum bch_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BCH_DATA_sb:
|
||||
case BCH_DATA_journal:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* On clean shutdown, store btree roots and current journal sequence number in
|
||||
* the superblock:
|
||||
@ -724,7 +685,8 @@ struct bch_sb_field_ext {
|
||||
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
|
||||
x(btree_subvolume_children, BCH_VERSION(1, 6)) \
|
||||
x(mi_btree_bitmap, BCH_VERSION(1, 7)) \
|
||||
x(bucket_stripe_sectors, BCH_VERSION(1, 8))
|
||||
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
|
||||
x(disk_accounting_v2, BCH_VERSION(1, 9))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@ -1377,7 +1339,9 @@ enum btree_id_flags {
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
BIT_ULL(KEY_TYPE_set))
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(accounting, 20, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_accounting)) \
|
||||
|
||||
enum btree_id {
|
||||
#define x(name, nr, ...) BTREE_ID_##name = nr,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "btree_types.h"
|
||||
#include "alloc_background.h"
|
||||
#include "dirent.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
|
70
fs/bcachefs/disk_accounting.c
Normal file
70
fs/bcachefs/disk_accounting.c
Normal file
@ -0,0 +1,70 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "disk_accounting.h"
|
||||
#include "replicas.h"
|
||||
|
||||
static const char * const disk_accounting_type_strs[] = {
|
||||
#define x(t, n, ...) [n] = #t,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
NULL
|
||||
};
|
||||
|
||||
int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags,
|
||||
struct printbuf *err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k)
|
||||
{
|
||||
if (k->type >= BCH_DISK_ACCOUNTING_TYPE_NR) {
|
||||
prt_printf(out, "unknown type %u", k->type);
|
||||
return;
|
||||
}
|
||||
|
||||
prt_str(out, disk_accounting_type_strs[k->type]);
|
||||
prt_str(out, " ");
|
||||
|
||||
switch (k->type) {
|
||||
case BCH_DISK_ACCOUNTING_nr_inodes:
|
||||
break;
|
||||
case BCH_DISK_ACCOUNTING_persistent_reserved:
|
||||
prt_printf(out, "replicas=%u", k->persistent_reserved.nr_replicas);
|
||||
break;
|
||||
case BCH_DISK_ACCOUNTING_replicas:
|
||||
bch2_replicas_entry_to_text(out, &k->replicas);
|
||||
break;
|
||||
case BCH_DISK_ACCOUNTING_dev_data_type:
|
||||
prt_printf(out, "dev=%u data_type=", k->dev_data_type.dev);
|
||||
bch2_prt_data_type(out, k->dev_data_type.data_type);
|
||||
break;
|
||||
case BCH_DISK_ACCOUNTING_dev_stripe_buckets:
|
||||
prt_printf(out, "dev=%u", k->dev_stripe_buckets.dev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_accounting_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_accounting acc = bkey_s_c_to_accounting(k);
|
||||
struct disk_accounting_pos acc_k;
|
||||
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
|
||||
|
||||
bch2_accounting_key_to_text(out, &acc_k);
|
||||
|
||||
for (unsigned i = 0; i < bch2_accounting_counters(k.k); i++)
|
||||
prt_printf(out, " %lli", acc.v->d[i]);
|
||||
}
|
||||
|
||||
void bch2_accounting_swab(struct bkey_s k)
|
||||
{
|
||||
for (u64 *p = (u64 *) k.v;
|
||||
p < (u64 *) bkey_val_end(k);
|
||||
p++)
|
||||
*p = swab64(*p);
|
||||
}
|
52
fs/bcachefs/disk_accounting.h
Normal file
52
fs/bcachefs/disk_accounting.h
Normal file
@ -0,0 +1,52 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_DISK_ACCOUNTING_H
|
||||
#define _BCACHEFS_DISK_ACCOUNTING_H
|
||||
|
||||
static inline unsigned bch2_accounting_counters(const struct bkey *k)
|
||||
{
|
||||
return bkey_val_u64s(k) - offsetof(struct bch_accounting, d) / sizeof(u64);
|
||||
}
|
||||
|
||||
static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
|
||||
struct bkey_s_c_accounting src)
|
||||
{
|
||||
EBUG_ON(dst->k.u64s != src.k->u64s);
|
||||
|
||||
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
|
||||
dst->v.d[i] += src.v->d[i];
|
||||
if (bversion_cmp(dst->k.version, src.k->version) < 0)
|
||||
dst->k.version = src.k->version;
|
||||
}
|
||||
|
||||
static inline void bpos_to_disk_accounting_pos(struct disk_accounting_pos *acc, struct bpos p)
|
||||
{
|
||||
acc->_pad = p;
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
bch2_bpos_swab(&acc->_pad);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct bpos disk_accounting_pos_to_bpos(struct disk_accounting_pos *k)
|
||||
{
|
||||
struct bpos ret = k->_pad;
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
bch2_bpos_swab(&ret);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c,
|
||||
enum bch_validate_flags, struct printbuf *);
|
||||
void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *);
|
||||
void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
void bch2_accounting_swab(struct bkey_s);
|
||||
|
||||
#define bch2_bkey_ops_accounting ((struct bkey_ops) { \
|
||||
.key_invalid = bch2_accounting_invalid, \
|
||||
.val_to_text = bch2_accounting_to_text, \
|
||||
.swab = bch2_accounting_swab, \
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
#endif /* _BCACHEFS_DISK_ACCOUNTING_H */
|
144
fs/bcachefs/disk_accounting_format.h
Normal file
144
fs/bcachefs/disk_accounting_format.h
Normal file
@ -0,0 +1,144 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
|
||||
#define _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
|
||||
|
||||
#include "replicas_format.h"
|
||||
|
||||
/*
|
||||
* Disk accounting - KEY_TYPE_accounting - on disk format:
|
||||
*
|
||||
* Here, the key has considerably more structure than a typical key (bpos); an
|
||||
* accounting key is 'struct disk_accounting_pos', which is a union of bpos.
|
||||
*
|
||||
* More specifically: a key is just a muliword integer (where word endianness
|
||||
* matches native byte order), so we're treating bpos as an opaque 20 byte
|
||||
* integer and mapping bch_accounting_key to that.
|
||||
*
|
||||
* This is a type-tagged union of all our various subtypes; a disk accounting
|
||||
* key can be device counters, replicas counters, et cetera - it's extensible.
|
||||
*
|
||||
* The value is a list of u64s or s64s; the number of counters is specific to a
|
||||
* given accounting type.
|
||||
*
|
||||
* Unlike with other key types, updates are _deltas_, and the deltas are not
|
||||
* resolved until the update to the underlying btree, done by btree write buffer
|
||||
* flush or journal replay.
|
||||
*
|
||||
* Journal replay in particular requires special handling. The journal tracks a
|
||||
* range of entries which may possibly have not yet been applied to the btree
|
||||
* yet - it does not know definitively whether individual entries are dirty and
|
||||
* still need to be applied.
|
||||
*
|
||||
* To handle this, we use the version field of struct bkey, and give every
|
||||
* accounting update a unique version number - a total ordering in time; the
|
||||
* version number is derived from the key's position in the journal. Then
|
||||
* journal replay can compare the version number of the key from the journal
|
||||
* with the version number of the key in the btree to determine if a key needs
|
||||
* to be replayed.
|
||||
*
|
||||
* For this to work, we must maintain this strict time ordering of updates as
|
||||
* they are flushed to the btree, both via write buffer flush and via journal
|
||||
* replay. This has complications for the write buffer code while journal replay
|
||||
* is still in progress; the write buffer cannot flush any accounting keys to
|
||||
* the btree until journal replay has finished replaying its accounting keys, or
|
||||
* the (newer) version number of the keys from the write buffer will cause
|
||||
* updates from journal replay to be lost.
|
||||
*/
|
||||
|
||||
struct bch_accounting {
|
||||
struct bch_val v;
|
||||
__u64 d[];
|
||||
};
|
||||
|
||||
#define BCH_ACCOUNTING_MAX_COUNTERS 3
|
||||
|
||||
#define BCH_DATA_TYPES() \
|
||||
x(free, 0) \
|
||||
x(sb, 1) \
|
||||
x(journal, 2) \
|
||||
x(btree, 3) \
|
||||
x(user, 4) \
|
||||
x(cached, 5) \
|
||||
x(parity, 6) \
|
||||
x(stripe, 7) \
|
||||
x(need_gc_gens, 8) \
|
||||
x(need_discard, 9) \
|
||||
x(unstriped, 10)
|
||||
|
||||
enum bch_data_type {
|
||||
#define x(t, n) BCH_DATA_##t,
|
||||
BCH_DATA_TYPES()
|
||||
#undef x
|
||||
BCH_DATA_NR
|
||||
};
|
||||
|
||||
static inline bool data_type_is_empty(enum bch_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BCH_DATA_free:
|
||||
case BCH_DATA_need_gc_gens:
|
||||
case BCH_DATA_need_discard:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool data_type_is_hidden(enum bch_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BCH_DATA_sb:
|
||||
case BCH_DATA_journal:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#define BCH_DISK_ACCOUNTING_TYPES() \
|
||||
x(nr_inodes, 0) \
|
||||
x(persistent_reserved, 1) \
|
||||
x(replicas, 2) \
|
||||
x(dev_data_type, 3) \
|
||||
x(dev_stripe_buckets, 4)
|
||||
|
||||
enum disk_accounting_type {
|
||||
#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
|
||||
BCH_DISK_ACCOUNTING_TYPES()
|
||||
#undef x
|
||||
BCH_DISK_ACCOUNTING_TYPE_NR,
|
||||
};
|
||||
|
||||
struct bch_nr_inodes {
|
||||
};
|
||||
|
||||
struct bch_persistent_reserved {
|
||||
__u8 nr_replicas;
|
||||
};
|
||||
|
||||
struct bch_dev_data_type {
|
||||
__u8 dev;
|
||||
__u8 data_type;
|
||||
};
|
||||
|
||||
struct bch_dev_stripe_buckets {
|
||||
__u8 dev;
|
||||
};
|
||||
|
||||
struct disk_accounting_pos {
|
||||
union {
|
||||
struct {
|
||||
__u8 type;
|
||||
union {
|
||||
struct bch_nr_inodes nr_inodes;
|
||||
struct bch_persistent_reserved persistent_reserved;
|
||||
struct bch_replicas_entry_v1 replicas;
|
||||
struct bch_dev_data_type dev_data_type;
|
||||
struct bch_dev_stripe_buckets dev_stripe_buckets;
|
||||
};
|
||||
};
|
||||
struct bpos _pad;
|
||||
};
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_DISK_ACCOUNTING_FORMAT_H */
|
@ -90,6 +90,7 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
|
||||
__set_bit_le64(BCH_FSCK_ERR_freespace_hole_missing, ext->errors_silent);
|
||||
__set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_backpointer, ext->errors_silent);
|
||||
__set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent);
|
||||
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
|
||||
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
|
||||
|
||||
bch2_write_super(c);
|
||||
|
@ -54,11 +54,32 @@
|
||||
BCH_FSCK_ERR_subvol_children_not_set) \
|
||||
x(mi_btree_bitmap, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
|
||||
BCH_FSCK_ERR_btree_bitmap_not_marked)
|
||||
BCH_FSCK_ERR_btree_bitmap_not_marked) \
|
||||
x(disk_accounting_v2, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
|
||||
BCH_FSCK_ERR_bkey_version_in_future, \
|
||||
BCH_FSCK_ERR_dev_usage_buckets_wrong, \
|
||||
BCH_FSCK_ERR_dev_usage_sectors_wrong, \
|
||||
BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
|
||||
BCH_FSCK_ERR_accounting_mismatch)
|
||||
|
||||
#define DOWNGRADE_TABLE() \
|
||||
x(bucket_stripe_sectors, \
|
||||
0)
|
||||
0) \
|
||||
x(disk_accounting_v2, \
|
||||
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
|
||||
BCH_FSCK_ERR_dev_usage_buckets_wrong, \
|
||||
BCH_FSCK_ERR_dev_usage_sectors_wrong, \
|
||||
BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_hidden_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_btree_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_data_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_cached_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_reserved_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
|
||||
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
|
||||
BCH_FSCK_ERR_bkey_version_in_future)
|
||||
|
||||
struct upgrade_downgrade_entry {
|
||||
u64 recovery_passes;
|
||||
|
Loading…
Reference in New Issue
Block a user