2007-06-12 21:07:11 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public
|
|
|
|
* License v2 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public
|
|
|
|
* License along with this program; if not, write to the
|
|
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
* Boston, MA 021110-1307, USA.
|
|
|
|
*/
|
|
|
|
|
2012-09-21 05:26:28 +08:00
|
|
|
#include "kerncompat.h"
|
2022-09-15 19:59:39 +08:00
|
|
|
#include <sys/stat.h>
|
2007-02-21 05:41:09 +08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
2008-04-01 23:08:13 +08:00
|
|
|
#include <getopt.h>
|
2022-09-15 19:59:39 +08:00
|
|
|
#include <errno.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <string.h>
|
2022-09-04 18:47:20 +08:00
|
|
|
#include <pthread.h>
|
2022-09-14 23:06:52 +08:00
|
|
|
#include <uuid/uuid.h>
|
2023-01-16 11:08:53 +08:00
|
|
|
#include <blkid/blkid.h>
|
2022-09-15 19:59:39 +08:00
|
|
|
#include "kernel-lib/list.h"
|
2022-09-14 23:06:52 +08:00
|
|
|
#include "kernel-lib/list_sort.h"
|
2022-09-15 19:59:39 +08:00
|
|
|
#include "kernel-lib/rbtree.h"
|
|
|
|
#include "kernel-lib/sizes.h"
|
2020-08-18 21:56:04 +08:00
|
|
|
#include "kernel-shared/ctree.h"
|
2020-08-18 21:56:04 +08:00
|
|
|
#include "kernel-shared/disk-io.h"
|
2020-08-18 21:56:04 +08:00
|
|
|
#include "kernel-shared/volumes.h"
|
2020-08-18 21:56:04 +08:00
|
|
|
#include "kernel-shared/transaction.h"
|
2021-04-26 14:27:36 +08:00
|
|
|
#include "kernel-shared/zoned.h"
|
2023-02-16 10:30:46 +08:00
|
|
|
#include "crypto/hash.h"
|
2022-09-15 19:59:39 +08:00
|
|
|
#include "common/defs.h"
|
|
|
|
#include "common/internal.h"
|
|
|
|
#include "common/messages.h"
|
2023-02-16 10:30:46 +08:00
|
|
|
#include "common/cpu-utils.h"
|
2019-06-20 07:46:21 +08:00
|
|
|
#include "common/utils.h"
|
2019-07-02 02:54:39 +08:00
|
|
|
#include "common/path-utils.h"
|
2019-07-02 06:42:23 +08:00
|
|
|
#include "common/device-utils.h"
|
2019-07-02 06:42:23 +08:00
|
|
|
#include "common/device-scan.h"
|
2019-06-20 07:46:21 +08:00
|
|
|
#include "common/help.h"
|
2019-06-20 07:46:21 +08:00
|
|
|
#include "common/rbtree-utils.h"
|
2021-09-04 04:29:06 +08:00
|
|
|
#include "common/parse-utils.h"
|
2019-06-20 07:46:21 +08:00
|
|
|
#include "common/fsfeatures.h"
|
2015-06-22 00:23:19 +08:00
|
|
|
#include "common/box.h"
|
2021-04-30 05:43:24 +08:00
|
|
|
#include "common/units.h"
|
2022-09-16 05:15:17 +08:00
|
|
|
#include "common/string-utils.h"
|
2023-01-12 01:49:52 +08:00
|
|
|
#include "cmds/commands.h"
|
2020-03-19 04:21:45 +08:00
|
|
|
#include "check/qgroup-verify.h"
|
2022-09-14 23:06:52 +08:00
|
|
|
#include "mkfs/common.h"
|
|
|
|
#include "mkfs/rootdir.h"
|
2007-02-21 05:41:09 +08:00
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
#include "libbtrfs/ctree.h"
|
|
|
|
|
2015-06-08 18:54:54 +08:00
|
|
|
struct mkfs_allocation {
|
|
|
|
u64 data;
|
|
|
|
u64 metadata;
|
|
|
|
u64 mixed;
|
|
|
|
u64 system;
|
|
|
|
};
|
|
|
|
|
2022-09-04 18:47:20 +08:00
|
|
|
static bool opt_zero_end = true;
|
|
|
|
static bool opt_discard = true;
|
|
|
|
static bool opt_zoned = true;
|
|
|
|
static int opt_oflags = O_RDWR;
|
|
|
|
|
|
|
|
struct prepare_device_progress {
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
int fd;
|
2022-09-04 18:47:20 +08:00
|
|
|
char *file;
|
|
|
|
u64 dev_block_count;
|
|
|
|
u64 block_count;
|
|
|
|
int ret;
|
|
|
|
};
|
|
|
|
|
2021-09-28 02:13:26 +08:00
|
|
|
static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
|
Revert "btrfs-progs: mkfs: create only desired block groups for single device"
This reverts commit 5f8232e5c8f0b0de0ef426274911385b0e877392.
This commit causes a regression:
$ mkfs.btrfs -f /dev/sda6
$ btrfsck /dev/sda6
Checking filesystem on /dev/sda6
UUID: 2ebb483c-1986-4610-802a-c6f3e6ab4b76
checking extents
Chunk[256, 228, 0]: length(4194304), offset(0), type(2) mismatch with
block group[0, 192, 4194304]: offset(4194304), objectid(0), flags(34)
Chunk[256, 228, 4194304]: length(8388608), offset(4194304), type(4)
mismatch with block group[4194304, 192, 8388608]: offset(8388608),
objectid(4194304), flags(36)
Block group[0, 4194304] (flags = 34) didn't find the relative chunk.
Block group[4194304, 8388608] (flags = 36) didn't find the relative
chunk.
......
The commit has the following bug causing the problem.
1) Typo forgets to add meta/data_profile for alloc_chunk.
Only meta/data_profile is added to allocate a block group, but not
chunk.
2) Type for the first system chunk is impossible to modify yet.
The type for the first chunk and its stripe is hard coded into
make_btrfs() function.
So even we try to modify the type of the block group, we are unable to
change the type of the first chunk.
Causing the chunk type mismatch problem.
The 1st bug can be fixed quite easily but the second is not.
The good news is, the last patch "btrfs-progs: mkfs: Cleanup temporary
chunk to avoid strange balance behavior." from my patchset can handle it
quite well alone.
So just revert the patch.
New bug fix for btrfsck(err is 0 even chunk/extent tree is corrupted) and
new test cases for mkfs will follow soon.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-14 10:13:01 +08:00
|
|
|
struct mkfs_allocation *allocation)
|
2008-09-24 00:29:10 +08:00
|
|
|
{
|
2017-06-13 17:19:35 +08:00
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
2007-04-07 03:39:12 +08:00
|
|
|
struct btrfs_trans_handle *trans;
|
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly
[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.
After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().
[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.
However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.
For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.
This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.
[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.
To support this change, this commit also introduces the following
modification:
- More comment on btrfs_space_info::bytes_*
To make code a little easier to read
- Export update_space_info() to preallocate empty data/metadata space
info for mkfs.
For mkfs, we only have a temporary fs image with SYSTEM chunk only.
Export update_space_info() so that we can preallocate empty
data/metadata space info before we start a transaction.
- Proper btrfs_space_info::bytes_reserved update
The timing is the as kernel (except we don't need to update
bytes_reserved for data extents)
* Increase bytes_reserved when call alloc_reserved_tree_block()
* Decrease bytes_reserved when running delayed refs
With the help of head->must_insert_reserved to determine whether we
need to decrease.
Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-25 07:32:43 +08:00
|
|
|
struct btrfs_space_info *sinfo;
|
Btrfs-progs: mkfs, fix metadata corruption when using mixed mode
When creating a filesystem with mixed block groups, we are creating two
space info objects to track used/reserved/pinned space, one only for data
and another one only for metadata.
This is making fstests test case generic/416 fail, with btrfs' check
reporting over an hundred errors about bad extents:
(...)
bad extent [17186816, 17190912), type mismatch with chunk
bad extent [17195008, 17199104), type mismatch with chunk
bad extent [17203200, 17207296), type mismatch with chunk
(...)
Because, surprisingly, this results in block groups that do not have the
BTRFS_BLOCK_GROUP_DATA flag set but have data extents allocated in them.
This is a regression introduced in btrfs-progs v5.2.
So fix this by making sure we only create one space info object, for both
metadata and data, when mixed block groups are enabled.
Fixes: c31edf610cbe1e ("btrfs-progs: Fix false ENOSPC alert by tracking used space correctly")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-25 18:27:17 +08:00
|
|
|
u64 flags = BTRFS_BLOCK_GROUP_METADATA;
|
2008-03-25 03:03:18 +08:00
|
|
|
u64 chunk_start = 0;
|
|
|
|
u64 chunk_size = 0;
|
2021-04-26 14:27:38 +08:00
|
|
|
u64 system_group_size = BTRFS_MKFS_SYSTEM_GROUP_SIZE;
|
2008-01-04 23:38:22 +08:00
|
|
|
int ret;
|
2007-04-07 03:39:12 +08:00
|
|
|
|
2021-04-26 14:27:38 +08:00
|
|
|
if (btrfs_is_zoned(fs_info)) {
|
|
|
|
/* Two zones are reserved for superblock */
|
|
|
|
system_group_size = fs_info->zone_size;
|
|
|
|
}
|
|
|
|
|
Btrfs-progs: mkfs, fix metadata corruption when using mixed mode
When creating a filesystem with mixed block groups, we are creating two
space info objects to track used/reserved/pinned space, one only for data
and another one only for metadata.
This is making fstests test case generic/416 fail, with btrfs' check
reporting over an hundred errors about bad extents:
(...)
bad extent [17186816, 17190912), type mismatch with chunk
bad extent [17195008, 17199104), type mismatch with chunk
bad extent [17203200, 17207296), type mismatch with chunk
(...)
Because, surprisingly, this results in block groups that do not have the
BTRFS_BLOCK_GROUP_DATA flag set but have data extents allocated in them.
This is a regression introduced in btrfs-progs v5.2.
So fix this by making sure we only create one space info object, for both
metadata and data, when mixed block groups are enabled.
Fixes: c31edf610cbe1e ("btrfs-progs: Fix false ENOSPC alert by tracking used space correctly")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-25 18:27:17 +08:00
|
|
|
if (mixed)
|
|
|
|
flags |= BTRFS_BLOCK_GROUP_DATA;
|
|
|
|
|
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly
[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.
After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().
[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.
However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.
For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.
This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.
[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.
To support this change, this commit also introduces the following
modification:
- More comment on btrfs_space_info::bytes_*
To make code a little easier to read
- Export update_space_info() to preallocate empty data/metadata space
info for mkfs.
For mkfs, we only have a temporary fs image with SYSTEM chunk only.
Export update_space_info() so that we can preallocate empty
data/metadata space info before we start a transaction.
- Proper btrfs_space_info::bytes_reserved update
The timing is the as kernel (except we don't need to update
bytes_reserved for data extents)
* Increase bytes_reserved when call alloc_reserved_tree_block()
* Decrease bytes_reserved when running delayed refs
With the help of head->must_insert_reserved to determine whether we
need to decrease.
Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-25 07:32:43 +08:00
|
|
|
/* Create needed space info to trace extents reservation */
|
Btrfs-progs: mkfs, fix metadata corruption when using mixed mode
When creating a filesystem with mixed block groups, we are creating two
space info objects to track used/reserved/pinned space, one only for data
and another one only for metadata.
This is making fstests test case generic/416 fail, with btrfs' check
reporting over an hundred errors about bad extents:
(...)
bad extent [17186816, 17190912), type mismatch with chunk
bad extent [17195008, 17199104), type mismatch with chunk
bad extent [17203200, 17207296), type mismatch with chunk
(...)
Because, surprisingly, this results in block groups that do not have the
BTRFS_BLOCK_GROUP_DATA flag set but have data extents allocated in them.
This is a regression introduced in btrfs-progs v5.2.
So fix this by making sure we only create one space info object, for both
metadata and data, when mixed block groups are enabled.
Fixes: c31edf610cbe1e ("btrfs-progs: Fix false ENOSPC alert by tracking used space correctly")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-25 18:27:17 +08:00
|
|
|
ret = update_space_info(fs_info, flags, 0, 0, &sinfo);
|
btrfs-progs: Fix false ENOSPC alert by tracking used space correctly
[BUG]
There is a bug report of unexpected ENOSPC from btrfs-convert, issue #123.
After some debugging, even when we have enough unallocated space, we
still hit ENOSPC at btrfs_reserve_extent().
[CAUSE]
Btrfs-progs relies on chunk preallocator to make enough space for
data/metadata.
However after the introduction of delayed-ref, it's no longer reliable
to rely on btrfs_space_info::bytes_used and
btrfs_space_info::bytes_pinned to calculate used metadata space.
For a running transaction with a lot of allocated tree blocks,
btrfs_space_info::bytes_used stays its original value, and will only be
updated when running delayed ref.
This makes btrfs-progs chunk preallocator completely useless. And for
btrfs-convert/mkfs.btrfs --rootdir, if we're going to have enough
metadata to fill a metadata block group in one transaction, we will hit
ENOSPC no matter whether we have enough unallocated space.
[FIX]
This patch will introduce btrfs_space_info::bytes_reserved to track how
many space we have reserved but not yet committed to extent tree.
To support this change, this commit also introduces the following
modification:
- More comment on btrfs_space_info::bytes_*
To make code a little easier to read
- Export update_space_info() to preallocate empty data/metadata space
info for mkfs.
For mkfs, we only have a temporary fs image with SYSTEM chunk only.
Export update_space_info() so that we can preallocate empty
data/metadata space info before we start a transaction.
- Proper btrfs_space_info::bytes_reserved update
The timing is the as kernel (except we don't need to update
bytes_reserved for data extents)
* Increase bytes_reserved when call alloc_reserved_tree_block()
* Decrease bytes_reserved when running delayed refs
With the help of head->must_insert_reserved to determine whether we
need to decrease.
Issue: #123
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-05-25 07:32:43 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2007-04-07 03:39:12 +08:00
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2022-09-30 20:44:38 +08:00
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
ret = PTR_ERR(trans);
|
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
|
|
|
return ret;
|
|
|
|
}
|
2008-03-25 03:03:18 +08:00
|
|
|
|
2008-04-23 02:06:56 +08:00
|
|
|
root->fs_info->system_allocs = 1;
|
2018-01-10 12:56:47 +08:00
|
|
|
/*
|
2021-08-24 04:14:52 +08:00
|
|
|
* We already created the block group item for our temporary system
|
|
|
|
* chunk in make_btrfs(), so account for the size here.
|
2018-01-10 12:56:47 +08:00
|
|
|
*/
|
2021-04-26 14:27:38 +08:00
|
|
|
allocation->system += system_group_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2008-04-16 03:42:08 +08:00
|
|
|
|
2010-12-10 02:31:08 +08:00
|
|
|
if (mixed) {
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_alloc_chunk(trans, fs_info,
|
2010-12-10 02:31:08 +08:00
|
|
|
&chunk_start, &chunk_size,
|
|
|
|
BTRFS_BLOCK_GROUP_METADATA |
|
|
|
|
BTRFS_BLOCK_GROUP_DATA);
|
2013-09-05 14:55:08 +08:00
|
|
|
if (ret == -ENOSPC) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("no space to allocate data/metadata chunk");
|
2013-09-05 14:55:08 +08:00
|
|
|
goto err;
|
|
|
|
}
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_make_block_group(trans, fs_info, 0,
|
2010-12-10 02:31:08 +08:00
|
|
|
BTRFS_BLOCK_GROUP_METADATA |
|
|
|
|
BTRFS_BLOCK_GROUP_DATA,
|
|
|
|
chunk_start, chunk_size);
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->mixed += chunk_size;
|
2010-12-10 02:31:08 +08:00
|
|
|
} else {
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_alloc_chunk(trans, fs_info,
|
2010-12-10 02:31:08 +08:00
|
|
|
&chunk_start, &chunk_size,
|
|
|
|
BTRFS_BLOCK_GROUP_METADATA);
|
2013-09-05 14:55:08 +08:00
|
|
|
if (ret == -ENOSPC) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("no space to allocate metadata chunk");
|
2013-09-05 14:55:08 +08:00
|
|
|
goto err;
|
|
|
|
}
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_make_block_group(trans, fs_info, 0,
|
2010-12-10 02:31:08 +08:00
|
|
|
BTRFS_BLOCK_GROUP_METADATA,
|
|
|
|
chunk_start, chunk_size);
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->metadata += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-12-10 02:31:08 +08:00
|
|
|
}
|
2008-03-25 03:03:18 +08:00
|
|
|
|
2008-04-23 02:06:56 +08:00
|
|
|
root->fs_info->system_allocs = 0;
|
2016-08-22 22:57:15 +08:00
|
|
|
ret = btrfs_commit_transaction(trans, root);
|
2015-07-02 01:12:38 +08:00
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-07-02 01:19:05 +08:00
|
|
|
static int create_data_block_groups(struct btrfs_trans_handle *trans,
|
2021-09-28 02:13:26 +08:00
|
|
|
struct btrfs_root *root, bool mixed,
|
2015-07-02 01:19:05 +08:00
|
|
|
struct mkfs_allocation *allocation)
|
2015-07-02 01:12:38 +08:00
|
|
|
{
|
2017-06-13 17:19:35 +08:00
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
2015-07-02 01:12:38 +08:00
|
|
|
u64 chunk_start = 0;
|
|
|
|
u64 chunk_size = 0;
|
2015-07-02 01:19:05 +08:00
|
|
|
int ret = 0;
|
2015-07-02 01:12:38 +08:00
|
|
|
|
2010-12-10 02:31:08 +08:00
|
|
|
if (!mixed) {
|
Btrfs-progs: mkfs, fix metadata corruption when using mixed mode
When creating a filesystem with mixed block groups, we are creating two
space info objects to track used/reserved/pinned space, one only for data
and another one only for metadata.
This is making fstests test case generic/416 fail, with btrfs' check
reporting over an hundred errors about bad extents:
(...)
bad extent [17186816, 17190912), type mismatch with chunk
bad extent [17195008, 17199104), type mismatch with chunk
bad extent [17203200, 17207296), type mismatch with chunk
(...)
Because, surprisingly, this results in block groups that do not have the
BTRFS_BLOCK_GROUP_DATA flag set but have data extents allocated in them.
This is a regression introduced in btrfs-progs v5.2.
So fix this by making sure we only create one space info object, for both
metadata and data, when mixed block groups are enabled.
Fixes: c31edf610cbe1e ("btrfs-progs: Fix false ENOSPC alert by tracking used space correctly")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-25 18:27:17 +08:00
|
|
|
struct btrfs_space_info *sinfo;
|
|
|
|
|
|
|
|
ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA,
|
|
|
|
0, 0, &sinfo);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_alloc_chunk(trans, fs_info,
|
2010-12-10 02:31:08 +08:00
|
|
|
&chunk_start, &chunk_size,
|
|
|
|
BTRFS_BLOCK_GROUP_DATA);
|
2013-09-05 14:55:08 +08:00
|
|
|
if (ret == -ENOSPC) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("no space to allocate data chunk");
|
2013-09-05 14:55:08 +08:00
|
|
|
goto err;
|
|
|
|
}
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_make_block_group(trans, fs_info, 0,
|
2010-12-10 02:31:08 +08:00
|
|
|
BTRFS_BLOCK_GROUP_DATA,
|
|
|
|
chunk_start, chunk_size);
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->data += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2010-12-10 02:31:08 +08:00
|
|
|
}
|
2008-03-25 03:03:18 +08:00
|
|
|
|
2015-07-02 01:19:05 +08:00
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-02-10 00:42:02 +08:00
|
|
|
static int make_root_dir(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root)
|
2015-07-02 01:19:05 +08:00
|
|
|
{
|
|
|
|
struct btrfs_key location;
|
|
|
|
int ret;
|
|
|
|
|
2008-01-04 23:38:22 +08:00
|
|
|
ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
|
2007-04-11 20:58:53 +08:00
|
|
|
BTRFS_ROOT_TREE_DIR_OBJECTID);
|
2007-03-21 23:13:29 +08:00
|
|
|
if (ret)
|
2007-04-07 03:39:12 +08:00
|
|
|
goto err;
|
2008-01-04 23:38:22 +08:00
|
|
|
ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
|
2007-04-07 03:39:12 +08:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
memcpy(&location, &root->fs_info->fs_root->root_key, sizeof(location));
|
|
|
|
location.offset = (u64)-1;
|
|
|
|
ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
|
2007-08-30 03:56:44 +08:00
|
|
|
"default", 7,
|
2013-03-07 00:32:51 +08:00
|
|
|
btrfs_super_root_dir(root->fs_info->super_copy),
|
2008-12-18 05:10:07 +08:00
|
|
|
&location, BTRFS_FT_DIR, 0);
|
2007-04-07 03:39:12 +08:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2007-12-13 03:39:46 +08:00
|
|
|
|
|
|
|
ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
|
|
|
|
"default", 7, location.objectid,
|
2008-07-25 00:13:32 +08:00
|
|
|
BTRFS_ROOT_TREE_DIR_OBJECTID, 0);
|
2007-12-13 03:39:46 +08:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2007-04-07 03:39:12 +08:00
|
|
|
err:
|
2007-03-21 23:13:29 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2007-03-21 08:35:03 +08:00
|
|
|
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
static int __recow_root(struct btrfs_trans_handle *trans, struct btrfs_root *root)
|
2008-04-05 03:42:17 +08:00
|
|
|
{
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
struct btrfs_path path;
|
|
|
|
struct btrfs_key key;
|
2016-08-22 23:55:16 +08:00
|
|
|
int ret;
|
2008-04-05 03:42:17 +08:00
|
|
|
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
btrfs_init_path(&path);
|
|
|
|
key.objectid = 0;
|
|
|
|
key.type = 0;
|
|
|
|
key.offset = 0;
|
2016-08-22 23:55:16 +08:00
|
|
|
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
/* Get a path to the left-most leaves */
|
|
|
|
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
struct btrfs_key found_key;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Our parent nodes must not be newer than the leaf, thus if
|
|
|
|
* the leaf is as new as the transaction, no need to re-COW.
|
|
|
|
*/
|
|
|
|
if (btrfs_header_generation(path.nodes[0]) == trans->transid)
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Grab the key of current tree block and do a COW search to
|
|
|
|
* the current tree block.
|
|
|
|
*/
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &key, 0);
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
|
|
|
|
/* This will ensure this leaf and all its parent get COWed */
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = 0;
|
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key, 0);
|
2023-04-20 05:13:46 +08:00
|
|
|
UASSERT(btrfs_comp_cpu_keys(&key, &found_key) == 0);
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
|
|
|
|
next:
|
|
|
|
ret = btrfs_next_leaf(root, &path);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
if (ret > 0) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return ret;
|
2013-07-03 21:25:09 +08:00
|
|
|
}
|
2008-04-05 03:42:17 +08:00
|
|
|
|
2021-11-09 03:26:43 +08:00
|
|
|
static int recow_global_roots(struct btrfs_trans_handle *trans)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct rb_node *n;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
for (n = rb_first(&fs_info->global_roots_tree); n; n = rb_next(n)) {
|
|
|
|
root = rb_entry(n, struct btrfs_root, rb_node);
|
|
|
|
ret = __recow_root(trans, root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-08-22 23:55:16 +08:00
|
|
|
static int recow_roots(struct btrfs_trans_handle *trans,
|
2013-07-03 21:25:09 +08:00
|
|
|
struct btrfs_root *root)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *info = root->fs_info;
|
2016-08-22 23:55:16 +08:00
|
|
|
int ret;
|
2009-05-30 04:35:30 +08:00
|
|
|
|
2016-08-22 23:55:16 +08:00
|
|
|
ret = __recow_root(trans, info->fs_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = __recow_root(trans, info->tree_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = __recow_root(trans, info->chunk_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = __recow_root(trans, info->dev_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2022-08-09 14:03:53 +08:00
|
|
|
|
|
|
|
if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
|
2022-03-08 06:10:51 +08:00
|
|
|
ret = __recow_root(trans, info->block_group_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
2021-11-09 03:26:43 +08:00
|
|
|
ret = recow_global_roots(trans);
|
2016-08-22 23:55:16 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
return 0;
|
2008-04-05 03:42:17 +08:00
|
|
|
}
|
|
|
|
|
2008-04-04 04:35:48 +08:00
|
|
|
static int create_one_raid_group(struct btrfs_trans_handle *trans,
|
2015-06-08 18:54:54 +08:00
|
|
|
struct btrfs_root *root, u64 type,
|
|
|
|
struct mkfs_allocation *allocation)
|
|
|
|
|
2008-04-04 04:35:48 +08:00
|
|
|
{
|
2017-06-13 17:19:35 +08:00
|
|
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
2008-04-04 04:35:48 +08:00
|
|
|
u64 chunk_start;
|
|
|
|
u64 chunk_size;
|
|
|
|
int ret;
|
|
|
|
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_alloc_chunk(trans, fs_info,
|
2008-04-04 04:35:48 +08:00
|
|
|
&chunk_start, &chunk_size, type);
|
2013-09-05 14:55:08 +08:00
|
|
|
if (ret == -ENOSPC) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("not enough free space to allocate chunk");
|
2013-09-05 14:55:08 +08:00
|
|
|
exit(1);
|
|
|
|
}
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-06-13 17:19:35 +08:00
|
|
|
ret = btrfs_make_block_group(trans, fs_info, 0,
|
2018-01-24 10:30:28 +08:00
|
|
|
type, chunk_start, chunk_size);
|
2016-08-22 22:57:15 +08:00
|
|
|
|
|
|
|
type &= BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
|
|
if (type == BTRFS_BLOCK_GROUP_DATA) {
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->data += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
} else if (type == BTRFS_BLOCK_GROUP_METADATA) {
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->metadata += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
} else if (type == BTRFS_BLOCK_GROUP_SYSTEM) {
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->system += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
} else if (type ==
|
|
|
|
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) {
|
2015-06-08 18:54:54 +08:00
|
|
|
allocation->mixed += chunk_size;
|
2016-08-22 22:57:15 +08:00
|
|
|
} else {
|
2022-09-28 07:13:00 +08:00
|
|
|
error("unrecognized profile type: 0x%llx", type);
|
2016-08-22 22:57:15 +08:00
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2015-06-08 18:54:54 +08:00
|
|
|
|
2008-04-04 04:35:48 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int create_raid_groups(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_root *root, u64 data_profile,
|
2021-09-28 02:13:26 +08:00
|
|
|
u64 metadata_profile, bool mixed,
|
2015-06-08 18:54:54 +08:00
|
|
|
struct mkfs_allocation *allocation)
|
2008-04-04 04:35:48 +08:00
|
|
|
{
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
int ret = 0;
|
2008-04-04 04:35:48 +08:00
|
|
|
|
2013-08-07 20:11:25 +08:00
|
|
|
if (metadata_profile) {
|
2010-12-10 02:31:08 +08:00
|
|
|
u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
|
|
|
|
|
2008-04-05 03:42:17 +08:00
|
|
|
ret = create_one_raid_group(trans, root,
|
|
|
|
BTRFS_BLOCK_GROUP_SYSTEM |
|
2015-06-08 18:54:54 +08:00
|
|
|
metadata_profile, allocation);
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2008-04-05 03:42:17 +08:00
|
|
|
|
2010-12-10 02:31:08 +08:00
|
|
|
if (mixed)
|
|
|
|
meta_flags |= BTRFS_BLOCK_GROUP_DATA;
|
|
|
|
|
|
|
|
ret = create_one_raid_group(trans, root, meta_flags |
|
2015-06-08 18:54:54 +08:00
|
|
|
metadata_profile, allocation);
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2008-04-05 03:42:17 +08:00
|
|
|
|
2008-04-04 04:35:48 +08:00
|
|
|
}
|
btrfs-progs: mkfs: allow --data DUP for single device
Current code don't support DUP profile on single device, except it
is in mixed mode, because of following reasons:
1: Some SSD do deduplication internally, so the duplication on
the filesystem side has no effect.
2: On a physical device, if the entire disk broken, --data DUP does not
help.
3: Half performance compared to single profile.
4: We have a workaround: create multi-partition on a single device,
and btffs will treat them as multi device.
Instead of refusing --data DUP, we give the user a choice and print
a wrning.
Test:
1: Tested by xfstests
Run with modified xfstests, I add test items of -d dup in single
device into btrfs/* and common/rc, run tests of btrfs/*,
with all mount option, no regression diffed with v4.3.
2: Tested by btrfs-progs
Checked following commands in "-m dup -d dup" fs with memleck
checking, all passed:
mkfs.btrfs -f --data dup --metadata dup /dev/sda6
btrfs filesystem show /dev/sda6
btrfs filesystem label /dev/sda6 btrfs_label_test
btrfs filesystem label /dev/sda6
btrfs device scan --all-devices
btrfs device scan /dev/sda6
btrfs device scan /dev/sda6
btrfs device ready /dev/sda6
btrfs check /dev/sda6
btrfs check -s 1 /dev/sda6
btrfs check --repair /dev/sda6
btrfs check --init-csum-tree /dev/sda6
btrfs check --init-extent-tree /dev/sda6
btrfs check --check-data-csum /dev/sda6
btrfs check --qgroup-report /dev/sda6
btrfs rescue super-recover -y /dev/sda6
btrfs rescue zero-log /dev/sda6
btrfs restore -l /dev/sda6
btrfs restore /dev/sda6 /
btrfs restore -s /dev/sda6 /
btrfs restore -x /dev/sda6 /
btrfs restore -m /dev/sda6 /
btrfs restore -S /dev/sda6 /
btrfs restore -v /dev/sda6 /
btrfs restore -i /dev/sda6 /
btrfs restore -o /dev/sda6 /
btrfs restore -u0 /dev/sda6 /
btrfs restore -u1 /dev/sda6 /
btrfs restore -D /dev/sda6 /
btrfs property list /dev/sda6
btrfs property get /dev/sda6 label
btrfs property set /dev/sda6 label test
btrfs property set /dev/sda6 label btrfs_label_test
btrfs help
btrfs help --full
btrfs version
btrfsck /dev/sda6
btrfs-find-root /dev/sda6
btrfs-find-root -a /dev/sda6
btrfs-map-logical -l1 /dev/sda6
btrfs-map-logical -l1 -c1 /dev/sda6
btrfs-map-logical -l1 -o /tmp/btrfs-map-logic-out /dev/sda6
btrfs-map-logical -l1 -b1 /dev/sda6
btrfs-select-super -s 0 /dev/sda6
btrfs-select-super -s 1 /dev/sda6
btrfstune -S 1 /dev/sda6
btrfstune -f -S 0 /dev/sda6
btrfstune -r /dev/sda6
btrfstune -x /dev/sda6
btrfstune -n /dev/sda6
btrfstune -f -U 00000000-0000-0000-0000-000000000000 /dev/sda6
btrfstune -f -u /dev/sda6
btrfs-calc-size /dev/sda6
btrfs-calc-size -v /dev/sda6
btrfs-calc-size -b /dev/sda6
btrfs-debug-tree /dev/sda6
btrfs-debug-tree -e /dev/sda6
btrfs-debug-tree -d /dev/sda6
btrfs-debug-tree -r /dev/sda6
btrfs-debug-tree -R /dev/sda6
btrfs-debug-tree -u /dev/sda6
btrfs-debug-tree -b 0 /dev/sda6
btrfs-debug-tree -t 0 /dev/sda6
btrfs-debug-tree -t 2 /dev/sda6
btrfs-show-super /dev/sda6
btrfs-show-super -i 0 /dev/sda6
btrfs-show-super -i 1 /dev/sda6
btrfs-show-super -i 2 /dev/sda6
btrfs-show-super -a /dev/sda6
btrfs-show-super -f /dev/sda6
btrfs-show-super -F /dev/sda6
btrfs subvolume list /mnt/btrfs-progs-tests
btrfs subvolume create /mnt/btrfs-progs-tests/mysubvol
btrfs subvolume list /mnt/btrfs-progs-tests
btrfs subvolume get-default /mnt/btrfs-progs-tests
btrfs subvolume set-default 258 /mnt/btrfs-progs-tests
btrfs subvolume get-default /mnt/btrfs-progs-tests
btrfs subvolume set-default /mnt/btrfs-progs-tests
btrfs subvolume snapshot /mnt/btrfs-progs-tests/mysubvol /mnt/btrfs-progs-tests/mysubvol_snap
btrfs subvolume list /mnt/btrfs-progs-tests
btrfs subvolume find-new /mnt/btrfs-progs-tests 0
btrfs subvolume find-new /mnt/btrfs-progs-tests 0
btrfs subvolume find-new /mnt/btrfs-progs-tests/mysubvol 0
btrfs subvolume find-new /mnt/btrfs-progs-tests/mysubvol 0
btrfs subvolume show /mnt/btrfs-progs-tests
btrfs subvolume show /mnt/btrfs-progs-tests/mysubvol
btrfs subvolume show /mnt/btrfs-progs-tests/mysubvol_snap
btrfs subvolume sync /mnt/btrfs-progs-tests
btrfs subvolume delete /mnt/btrfs-progs-tests/mysubvol_snap
btrfs subvolume delete /mnt/btrfs-progs-tests/mysubvol
btrfs subvolume sync /mnt/btrfs-progs-tests
btrfs filesystem df /mnt/btrfs-progs-tests
btrfs filesystem show /mnt/btrfs-progs-tests
btrfs filesystem sync /mnt/btrfs-progs-tests
btrfs filesystem label /mnt/btrfs-progs-tests btrfs_label_test
btrfs filesystem label /mnt/btrfs-progs-tests
btrfs filesystem usage /mnt/btrfs-progs-tests
btrfs filesystem defragment -s 1024 -l 2048 /mnt/btrfs-progs-tests/filesystem_test_dir/test_dir_0/test_file_0
btrfs filesystem defragment /mnt/btrfs-progs-tests/filesystem_test_dir/test_dir_0/test_file_1
btrfs filesystem defragment -f /mnt/btrfs-progs-tests/filesystem_test_dir/test_dir_0/test_file_2
btrfs filesystem defragment -czlib /mnt/btrfs-progs-tests/filesystem_test_dir/test_dir_0/test_file_3
btrfs filesystem defragment -clzo /mnt/btrfs-progs-tests/filesystem_test_dir/test_dir_0/test_file_4
btrfs filesystem defragment /mnt/btrfs-progs-tests/filesystem_test_dir
btrfs filesystem defragment -r /mnt/btrfs-progs-tests/filesystem_test_dir
btrfs filesystem defragment /mnt/btrfs-progs-tests
btrfs filesystem resize 1:-10M /mnt/btrfs-progs-tests
btrfs filesystem resize 1:max /mnt/btrfs-progs-tests
btrfs balance start /mnt/btrfs-progs-tests
btrfs balance start -v /mnt/btrfs-progs-tests
btrfs balance start -f /mnt/btrfs-progs-tests
btrfs balance status -v /mnt/btrfs-progs-tests
btrfs balance pause /mnt/btrfs-progs-tests
btrfs balance status /mnt/btrfs-progs-tests
btrfs balance resume /mnt/btrfs-progs-tests
btrfs balance status -v /mnt/btrfs-progs-tests
btrfs balance cancel /mnt/btrfs-progs-tests
btrfs balance start -dprofiles=single /mnt/btrfs-progs-tests
btrfs balance start -dconvert=single /mnt/btrfs-progs-tests
btrfs balance start -ddevid=1 /mnt/btrfs-progs-tests
btrfs balance start -f -mprofiles=single /mnt/btrfs-progs-tests
btrfs balance start -f -mconvert=single /mnt/btrfs-progs-tests
btrfs balance start -f -mdevid=1 /mnt/btrfs-progs-tests
btrfs balance start -f -sprofiles=single /mnt/btrfs-progs-tests
btrfs balance start -f -sconvert=single /mnt/btrfs-progs-tests
btrfs balance start -f -sdevid=1 /mnt/btrfs-progs-tests
btrfs device add -f /dev/sda10 /mnt/btrfs-progs-tests
btrfs device del /dev/sda10 /mnt/btrfs-progs-tests
btrfs device stats /dev/sda6
btrfs device stats -z /dev/sda6
btrfs device stats /mnt/btrfs-progs-tests
btrfs device stats -z /mnt/btrfs-progs-tests
btrfs device usage /mnt/btrfs-progs-tests
btrfs scrub status /mnt/btrfs-progs-tests
btrfs scrub start -B /mnt/btrfs-progs-tests
btrfs scrub start -B -d /mnt/btrfs-progs-tests
btrfs scrub start -B -r /mnt/btrfs-progs-tests
btrfs scrub status /mnt/btrfs-progs-tests
btrfs scrub start /mnt/btrfs-progs-tests
btrfs scrub status /mnt/btrfs-progs-tests
btrfs scrub status /mnt/btrfs-progs-tests
btrfs scrub status -d /mnt/btrfs-progs-tests
btrfs scrub status -R /mnt/btrfs-progs-tests
btrfs scrub status /mnt/btrfs-progs-tests
btrfs scrub start /dev/sda6
btrfs scrub status /dev/sda6
btrfs scrub status /dev/sda6
btrfs scrub status -d /dev/sda6
btrfs scrub status -R /dev/sda6
btrfs scrub status /dev/sda6
btrfs subvolume snapshot -r /mnt/btrfs-progs-tests /mnt/btrfs-progs-tests/snap1
btrfs send -f /tmp/btrfs_snapshot_test /mnt/btrfs-progs-tests/snap1
btrfs send -e -f /tmp/btrfs_snapshot_test /mnt/btrfs-progs-tests/snap1
btrfs send --no-data -f /tmp/btrfs_snapshot_test /mnt/btrfs-progs-tests/snap1
btrfs quota enable /mnt/btrfs-progs-tests
btrfs quota rescan /mnt/btrfs-progs-tests
btrfs quota rescan -s /mnt/btrfs-progs-tests
btrfs quota rescan -w /mnt/btrfs-progs-tests
btrfs quota disable /mnt/btrfs-progs-tests
btrfs quota enable /mnt/btrfs-progs-tests
btrfs qgroup create 1/5 /mnt/btrfs-progs-tests
btrfs qgroup create 2/5 /mnt/btrfs-progs-tests
btrfs qgroup assign 1/5 2/5 /mnt/btrfs-progs-tests
btrfs qgroup limit 1G 1/5 /mnt/btrfs-progs-tests
btrfs qgroup show /mnt/btrfs-progs-tests
btrfs qgroup show -p -c -r -e -F -f /mnt/btrfs-progs-tests
btrfs qgroup remove 1/5 2/5 /mnt/btrfs-progs-tests
btrfs qgroup destroy 2/5 /mnt/btrfs-progs-tests
btrfs qgroup destroy 1/5 /mnt/btrfs-progs-tests
btrfs quota disable /mnt/btrfs-progs-tests
btrfs replace start -f -B /dev/sda6 /dev/sda10 /mnt/btrfs-progs-tests
btrfs replace status /mnt/btrfs-progs-tests
btrfs replace start -f -B /dev/sda10 /dev/sda6 /mnt/btrfs-progs-tests
btrfs-convert /dev/sda6
btrfs-convert -r /dev/sda6
btrfs-convert -d /dev/sda6
btrfs-convert -i /dev/sda6
btrfs-convert -n /dev/sda6
btrfs-convert -N 4096 /dev/sda6
btrfs-convert -l test /dev/sda6
btrfs-convert -L /dev/sda6
btrfs-convert --no-progress /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -c 0 /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -c 9 /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -t 0 /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -t 1 /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -t 32 /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -w /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
mkfs.btrfs -f /dev/sda6
btrfs-image -w /dev/sda6 /tmp/btrfs_image.img
btrfs-image -r /tmp/btrfs_image.img /dev/sda6
btrfs-image -r -t 0 /tmp/btrfs_image.img /dev/sda6
btrfs-image -r -t 1 /tmp/btrfs_image.img /dev/sda6
btrfs-image -r -t 32 /tmp/btrfs_image.img /dev/sda6
btrfs-image -r -o /tmp/btrfs_image.img /dev/sda6
3: Manual check relation source by:
grep DUP *.c
Confirmed that all source are modified.
4: Use this raid type manually, do some operations in fs,
no error found in command and dmesg.
5: Combination of dup conversion with fsck
Confirmed OK with relative kernel patch titled:
[PATCH] btrfs: Support convert to -d dup for btrfs-convert
export TEST_DEV='/dev/vdc'
export TEST_DIR='/var/ltf/tester/mnt'
do_dup_test()
{
local m_from="$1"
local d_from="$2"
local m_to="$3"
local d_to="$4"
echo "Convert from -m $m_from -d $d_from to -m $m_to -d $d_to"
umount "$TEST_DIR" &>/dev/null
./mkfs.btrfs -f -m "$m_from" -d "$d_from" "$TEST_DEV" >/dev/null || return 1
mount "$TEST_DEV" "$TEST_DIR" || return 1
cp -a /sbin/* "$TEST_DIR"
[[ "$m_from" != "$m_to" ]] && {
./btrfs balance start -f -mconvert="$m_to" "$TEST_DIR" || return 1
}
[[ "$d_from" != "$d_to" ]] && {
local opt=()
[[ "$d_to" == single ]] && opt+=("-f")
./btrfs balance start "${opt[@]}" -dconvert="$d_to" "$TEST_DIR" || return 1
}
umount "$TEST_DIR" || return 1
./btrfsck "$TEST_DEV" || return 1
echo
return 0
}
test_all()
{
for m_from in single dup; do
for d_from in single dup; do
for m_to in single dup; do
for d_to in single dup; do
do_dup_test "$m_from" "$d_from" "$m_to" "$d_to" || return 1
done
done
done
done
}
test_all
Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Tested-by: Austin S. Hemmelgarn <ahferroin7@gmail.com>
[ minor updates in the changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
2015-11-19 17:36:24 +08:00
|
|
|
if (!mixed && data_profile) {
|
2008-04-04 04:35:48 +08:00
|
|
|
ret = create_one_raid_group(trans, root,
|
|
|
|
BTRFS_BLOCK_GROUP_DATA |
|
2015-06-08 18:54:54 +08:00
|
|
|
data_profile, allocation);
|
2016-08-22 22:57:15 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2008-04-04 04:35:48 +08:00
|
|
|
}
|
2013-07-03 21:25:09 +08:00
|
|
|
|
2016-08-22 23:55:16 +08:00
|
|
|
return ret;
|
2008-04-04 04:35:48 +08:00
|
|
|
}
|
|
|
|
|
2023-01-12 01:49:52 +08:00
|
|
|
static const char * const mkfs_usage[] = {
|
|
|
|
"mkfs.btrfs [options] <dev> [<dev...>]",
|
|
|
|
"Create a BTRFS filesystem on a device or multiple devices",
|
|
|
|
"",
|
2023-01-12 01:56:46 +08:00
|
|
|
"Allocation profiles:",
|
|
|
|
OPTLINE("-d|--data PROFILE", "data profile, raid0, raid1, raid1c3, raid1c4, raid5, raid6, raid10, dup or single"),
|
|
|
|
OPTLINE("-m|--metadata PROFILE", "metadata profile, values like for data profile"),
|
|
|
|
OPTLINE("-M|--mixed","mix metadata and data together"),
|
|
|
|
"Features:",
|
|
|
|
OPTLINE("--csum TYPE", ""),
|
|
|
|
OPTLINE("--checksum TYPE", "checksum algorithm to use, crc32c (default), xxhash, sha256, blake2"),
|
|
|
|
OPTLINE("-n|--nodesize SIZE", "size of btree nodes"),
|
|
|
|
OPTLINE("-s|--sectorsize SIZE", "data block size (may not be mountable by current kernel)"),
|
|
|
|
OPTLINE("-O|--features LIST", "comma separated list of filesystem features (use '-O list-all' to list features)"),
|
|
|
|
OPTLINE("-L|--label LABEL", "set the filesystem label"),
|
|
|
|
OPTLINE("-U|--uuid UUID", "specify the filesystem UUID (must be unique)"),
|
|
|
|
"Creation:",
|
|
|
|
OPTLINE("-b|--byte-count SIZE", "set size of each device to SIZE (filesystem size is sum of all device sizes)"),
|
|
|
|
OPTLINE("-r|--rootdir DIR", "copy files from DIR to the image root directory"),
|
|
|
|
OPTLINE("--shrink", "(with --rootdir) shrink the filled filesystem to minimal size"),
|
|
|
|
OPTLINE("-K|--nodiscard", "do not perform whole device TRIM"),
|
|
|
|
OPTLINE("-f|--force", "force overwrite of existing filesystem"),
|
|
|
|
"General:",
|
|
|
|
OPTLINE("-q|--quiet", "no messages except errors"),
|
|
|
|
OPTLINE("-v|--verbose", "increase verbosity level, default is 1"),
|
|
|
|
OPTLINE("-V|--version", "print the mkfs.btrfs version and exit"),
|
|
|
|
OPTLINE("--help", "print this help and exit"),
|
|
|
|
"Deprecated:",
|
|
|
|
OPTLINE("-l|--leafsize SIZE", "removed in 6.0, use --nodesize"),
|
2023-04-11 10:31:05 +08:00
|
|
|
OPTLINE("-R|--runtime-features LIST", "removed in 6.3, use -O|--features"),
|
2023-01-12 01:49:52 +08:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct cmd_struct mkfs_cmd = {
|
|
|
|
.usagestr = mkfs_usage
|
|
|
|
};
|
|
|
|
|
2015-11-07 01:12:44 +08:00
|
|
|
static int zero_output_file(int out_fd, u64 size)
|
2010-07-08 17:17:59 +08:00
|
|
|
{
|
2015-11-07 01:10:29 +08:00
|
|
|
int loop_num;
|
2010-07-08 17:17:59 +08:00
|
|
|
u64 location = 0;
|
2017-10-19 14:16:17 +08:00
|
|
|
char buf[SZ_4K];
|
2010-07-08 17:17:59 +08:00
|
|
|
int ret = 0, i;
|
|
|
|
ssize_t written;
|
|
|
|
|
2017-10-19 14:16:17 +08:00
|
|
|
memset(buf, 0, SZ_4K);
|
|
|
|
|
|
|
|
/* Only zero out the first 1M */
|
|
|
|
loop_num = SZ_1M / SZ_4K;
|
2010-07-08 17:17:59 +08:00
|
|
|
for (i = 0; i < loop_num; i++) {
|
btrfs-progs: stop using legacy *64 interfaces
The *64 interfaces, such as fstat64, off64_t, etc, are legacy interfaces
created at a time when 64-bit file support was still new. They are
generally exposed when defining a macro named _LARGEFILE64_SOURCE, as
e.g. the glibc docs[0] say.
The modern way to utilise largefile support, is to continue to use the
regular interfaces (off_t, fstat, ..), and define _FILE_OFFSET_BITS=64.
We already use the autoconf macro AC_SYS_LARGEFILE[1] which arranges this
and sets this macro for us. Therefore, we can utilise the non-64 names
without fear of breaking on 32-bit systems.
This fixes the build against musl libc, ever since musl dropped the
*64 compat from interfaces by default[2] just for _GNU_SOURCE, unless
_LARGEFILE64_SOURCE is defined. However, there are plans for a future
removal of the whole *64 header API, and that workaround (adding another
define) might cease to exist.
So, rename all *64 API use to the regular non-suffixed names. For
consistency, rename the internal functions that were *64 named
(lstat64_path, ..) too.
This should have no regressions on any platform.
[0]: https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html#index-_005fLARGEFILE64_005fSOURCE
[1]: https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/System-Services.html
[2]: https://github.com/bminor/musl/commit/25e6fee27f4a293728dd15b659170e7b9c7db9bc
Pull-request: #615
Signed-off-by: psykose <alice@ayaya.dev>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-04-16 01:15:49 +08:00
|
|
|
written = pwrite(out_fd, buf, SZ_4K, location);
|
2017-10-19 14:16:17 +08:00
|
|
|
if (written != SZ_4K)
|
2010-07-08 17:17:59 +08:00
|
|
|
ret = -EIO;
|
2017-10-19 14:16:17 +08:00
|
|
|
location += SZ_4K;
|
2010-07-08 17:17:59 +08:00
|
|
|
}
|
2017-10-19 14:16:17 +08:00
|
|
|
|
|
|
|
/* Then enlarge the file to size */
|
btrfs-progs: stop using legacy *64 interfaces
The *64 interfaces, such as fstat64, off64_t, etc, are legacy interfaces
created at a time when 64-bit file support was still new. They are
generally exposed when defining a macro named _LARGEFILE64_SOURCE, as
e.g. the glibc docs[0] say.
The modern way to utilise largefile support, is to continue to use the
regular interfaces (off_t, fstat, ..), and define _FILE_OFFSET_BITS=64.
We already use the autoconf macro AC_SYS_LARGEFILE[1] which arranges this
and sets this macro for us. Therefore, we can utilise the non-64 names
without fear of breaking on 32-bit systems.
This fixes the build against musl libc, ever since musl dropped the
*64 compat from interfaces by default[2] just for _GNU_SOURCE, unless
_LARGEFILE64_SOURCE is defined. However, there are plans for a future
removal of the whole *64 header API, and that workaround (adding another
define) might cease to exist.
So, rename all *64 API use to the regular non-suffixed names. For
consistency, rename the internal functions that were *64 named
(lstat64_path, ..) too.
This should have no regressions on any platform.
[0]: https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html#index-_005fLARGEFILE64_005fSOURCE
[1]: https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/System-Services.html
[2]: https://github.com/bminor/musl/commit/25e6fee27f4a293728dd15b659170e7b9c7db9bc
Pull-request: #615
Signed-off-by: psykose <alice@ayaya.dev>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-04-16 01:15:49 +08:00
|
|
|
written = pwrite(out_fd, buf, 1, size - 1);
|
2017-10-19 14:16:17 +08:00
|
|
|
if (written < 1)
|
|
|
|
ret = -EIO;
|
2010-07-08 17:17:59 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-11-03 19:03:00 +08:00
|
|
|
static int _cmp_device_by_id(void *priv, struct list_head *a,
|
|
|
|
struct list_head *b)
|
|
|
|
{
|
|
|
|
return list_entry(a, struct btrfs_device, dev_list)->devid -
|
|
|
|
list_entry(b, struct btrfs_device, dev_list)->devid;
|
|
|
|
}
|
|
|
|
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
static void list_all_devices(struct btrfs_root *root)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_devices *fs_devices;
|
|
|
|
struct btrfs_device *device;
|
|
|
|
int number_of_devices = 0;
|
|
|
|
|
|
|
|
fs_devices = root->fs_info->fs_devices;
|
|
|
|
|
|
|
|
list_for_each_entry(device, &fs_devices->devices, dev_list)
|
|
|
|
number_of_devices++;
|
|
|
|
|
2015-11-03 19:03:00 +08:00
|
|
|
list_sort(NULL, &fs_devices->devices, _cmp_device_by_id);
|
|
|
|
|
2015-06-08 22:26:54 +08:00
|
|
|
printf("Number of devices: %d\n", number_of_devices);
|
|
|
|
printf("Devices:\n");
|
|
|
|
printf(" ID SIZE PATH\n");
|
2015-11-03 19:03:00 +08:00
|
|
|
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
2015-06-08 22:26:54 +08:00
|
|
|
printf(" %3llu %10s %s\n",
|
2015-02-02 23:10:10 +08:00
|
|
|
device->devid,
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
pretty_size(device->total_bytes),
|
2015-02-02 23:11:23 +08:00
|
|
|
device->name);
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
2023-06-08 06:08:35 +08:00
|
|
|
static bool is_temp_block_group(struct extent_buffer *node,
|
|
|
|
struct btrfs_block_group_item *bgi,
|
|
|
|
u64 data_profile, u64 meta_profile,
|
|
|
|
u64 sys_profile)
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
{
|
2020-05-01 14:52:16 +08:00
|
|
|
u64 flag = btrfs_block_group_flags(node, bgi);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
u64 flag_type = flag & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
|
|
|
u64 flag_profile = flag & BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
2020-05-01 14:52:16 +08:00
|
|
|
u64 used = btrfs_block_group_used(node, bgi);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Chunks meets all the following conditions is a temp chunk
|
|
|
|
* 1) Empty chunk
|
|
|
|
* Temp chunk is always empty.
|
|
|
|
*
|
2016-05-12 07:50:36 +08:00
|
|
|
* 2) profile mismatch with mkfs profile.
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
* Temp chunk is always in SINGLE
|
|
|
|
*
|
|
|
|
* 3) Size differs with mkfs_alloc
|
|
|
|
* Special case for SINGLE/SINGLE btrfs.
|
|
|
|
* In that case, temp data chunk and real data chunk are always empty.
|
|
|
|
* So we need to use mkfs_alloc to be sure which chunk is the newly
|
|
|
|
* allocated.
|
|
|
|
*
|
|
|
|
* Normally, new chunk size is equal to mkfs one (One chunk)
|
|
|
|
* If it has multiple chunks, we just refuse to delete any one.
|
|
|
|
* As they are all single, so no real problem will happen.
|
|
|
|
* So only use condition 1) and 2) to judge them.
|
|
|
|
*/
|
|
|
|
if (used != 0)
|
2023-06-08 06:08:35 +08:00
|
|
|
return false;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
switch (flag_type) {
|
|
|
|
case BTRFS_BLOCK_GROUP_DATA:
|
|
|
|
case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
|
|
|
|
data_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
|
|
|
if (flag_profile != data_profile)
|
2023-06-08 06:08:35 +08:00
|
|
|
return true;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
break;
|
|
|
|
case BTRFS_BLOCK_GROUP_METADATA:
|
|
|
|
meta_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
|
|
|
if (flag_profile != meta_profile)
|
2023-06-08 06:08:35 +08:00
|
|
|
return true;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
break;
|
|
|
|
case BTRFS_BLOCK_GROUP_SYSTEM:
|
|
|
|
sys_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
|
|
|
if (flag_profile != sys_profile)
|
2023-06-08 06:08:35 +08:00
|
|
|
return true;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
break;
|
|
|
|
}
|
2023-06-08 06:08:35 +08:00
|
|
|
return false;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Note: if current is a block group, it will skip it anyway */
|
|
|
|
static int next_block_group(struct btrfs_root *root,
|
|
|
|
struct btrfs_path *path)
|
|
|
|
{
|
|
|
|
struct btrfs_key key;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
ret = btrfs_next_item(root, path);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
|
|
|
if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function will cleanup */
|
|
|
|
static int cleanup_temp_chunks(struct btrfs_fs_info *fs_info,
|
|
|
|
struct mkfs_allocation *alloc,
|
|
|
|
u64 data_profile, u64 meta_profile,
|
|
|
|
u64 sys_profile)
|
|
|
|
{
|
|
|
|
struct btrfs_trans_handle *trans = NULL;
|
|
|
|
struct btrfs_block_group_item *bgi;
|
2022-03-08 06:10:48 +08:00
|
|
|
struct btrfs_root *root = btrfs_block_group_root(fs_info);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
struct btrfs_key key;
|
|
|
|
struct btrfs_key found_key;
|
2016-11-03 07:37:51 +08:00
|
|
|
struct btrfs_path path;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2016-11-03 07:37:51 +08:00
|
|
|
btrfs_init_path(&path);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2022-09-30 20:44:38 +08:00
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
ret = PTR_ERR(trans);
|
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
|
|
|
return ret;
|
|
|
|
}
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
|
|
|
|
key.objectid = 0;
|
|
|
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
|
|
|
key.offset = 0;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
/*
|
|
|
|
* as the rest of the loop may modify the tree, we need to
|
|
|
|
* start a new search each time.
|
|
|
|
*/
|
2016-11-03 07:37:51 +08:00
|
|
|
ret = btrfs_search_slot(trans, root, &key, &path, 0, 0);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2017-10-19 13:41:34 +08:00
|
|
|
/* Don't pollute ret for >0 case */
|
|
|
|
if (ret > 0)
|
|
|
|
ret = 0;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
|
2016-11-03 07:37:51 +08:00
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
|
|
|
|
path.slots[0]);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
if (found_key.objectid < key.objectid)
|
|
|
|
goto out;
|
|
|
|
if (found_key.type != BTRFS_BLOCK_GROUP_ITEM_KEY) {
|
2016-11-03 07:37:51 +08:00
|
|
|
ret = next_block_group(root, &path);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
if (ret > 0) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-11-03 07:37:51 +08:00
|
|
|
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
|
|
|
|
path.slots[0]);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
}
|
|
|
|
|
2016-11-03 07:37:51 +08:00
|
|
|
bgi = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
struct btrfs_block_group_item);
|
2016-11-03 07:37:51 +08:00
|
|
|
if (is_temp_block_group(path.nodes[0], bgi,
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
data_profile, meta_profile,
|
|
|
|
sys_profile)) {
|
2020-05-01 14:52:16 +08:00
|
|
|
u64 flags = btrfs_block_group_flags(path.nodes[0], bgi);
|
2016-07-01 13:26:25 +08:00
|
|
|
|
2020-05-05 08:02:22 +08:00
|
|
|
ret = btrfs_remove_block_group(trans,
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
found_key.objectid, found_key.offset);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2016-07-01 13:26:25 +08:00
|
|
|
|
|
|
|
if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
|
|
|
|
BTRFS_BLOCK_GROUP_DATA)
|
|
|
|
alloc->data -= found_key.offset;
|
|
|
|
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
|
|
|
|
BTRFS_BLOCK_GROUP_METADATA)
|
|
|
|
alloc->metadata -= found_key.offset;
|
|
|
|
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
|
|
|
|
BTRFS_BLOCK_GROUP_SYSTEM)
|
|
|
|
alloc->system -= found_key.offset;
|
|
|
|
else if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) ==
|
|
|
|
(BTRFS_BLOCK_GROUP_METADATA |
|
|
|
|
BTRFS_BLOCK_GROUP_DATA))
|
|
|
|
alloc->mixed -= found_key.offset;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
}
|
2016-11-03 07:37:51 +08:00
|
|
|
btrfs_release_path(&path);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
key.objectid = found_key.objectid + found_key.offset;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
if (trans)
|
|
|
|
btrfs_commit_transaction(trans, root);
|
2016-11-03 07:37:51 +08:00
|
|
|
btrfs_release_path(&path);
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-10-20 09:59:06 +08:00
|
|
|
/*
|
|
|
|
* Just update chunk allocation info, since --rootdir may allocate new
|
|
|
|
* chunks which is not updated in @allocation structure.
|
|
|
|
*/
|
|
|
|
static void update_chunk_allocation(struct btrfs_fs_info *fs_info,
|
|
|
|
struct mkfs_allocation *allocation)
|
|
|
|
{
|
2020-05-01 14:52:19 +08:00
|
|
|
struct btrfs_block_group *bg_cache;
|
2017-10-20 09:59:06 +08:00
|
|
|
const u64 mixed_flag = BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA;
|
|
|
|
u64 search_start = 0;
|
|
|
|
|
|
|
|
allocation->mixed = 0;
|
|
|
|
allocation->data = 0;
|
|
|
|
allocation->metadata = 0;
|
|
|
|
allocation->system = 0;
|
|
|
|
while (1) {
|
|
|
|
bg_cache = btrfs_lookup_first_block_group(fs_info,
|
|
|
|
search_start);
|
|
|
|
if (!bg_cache)
|
|
|
|
break;
|
|
|
|
if ((bg_cache->flags & mixed_flag) == mixed_flag)
|
2020-05-01 14:52:17 +08:00
|
|
|
allocation->mixed += bg_cache->length;
|
2017-10-20 09:59:06 +08:00
|
|
|
else if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
|
2020-05-01 14:52:17 +08:00
|
|
|
allocation->data += bg_cache->length;
|
2017-10-20 09:59:06 +08:00
|
|
|
else if (bg_cache->flags & BTRFS_BLOCK_GROUP_METADATA)
|
2020-05-01 14:52:17 +08:00
|
|
|
allocation->metadata += bg_cache->length;
|
2017-10-20 09:59:06 +08:00
|
|
|
else
|
2020-05-01 14:52:17 +08:00
|
|
|
allocation->system += bg_cache->length;
|
|
|
|
search_start = bg_cache->start + bg_cache->length;
|
2017-10-20 09:59:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-03 15:32:18 +08:00
|
|
|
static int create_data_reloc_tree(struct btrfs_trans_handle *trans)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
|
struct btrfs_inode_item *inode;
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct btrfs_path path;
|
2022-03-08 06:11:01 +08:00
|
|
|
struct btrfs_key key = {
|
|
|
|
.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID,
|
|
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
|
|
};
|
2019-01-03 15:32:18 +08:00
|
|
|
u64 ino = BTRFS_FIRST_FREE_OBJECTID;
|
|
|
|
char *name = "..";
|
|
|
|
int ret;
|
|
|
|
|
2022-03-08 06:11:01 +08:00
|
|
|
root = btrfs_create_tree(trans, fs_info, &key);
|
2019-01-03 15:32:18 +08:00
|
|
|
if (IS_ERR(root)) {
|
|
|
|
ret = PTR_ERR(root);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/* Update dirid as created tree has default dirid 0 */
|
|
|
|
btrfs_set_root_dirid(&root->root_item, ino);
|
|
|
|
ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key,
|
|
|
|
&root->root_item);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Cache this tree so it can be cleaned up at close_ctree() */
|
|
|
|
ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
|
|
|
|
btrfs_fs_roots_compare_roots);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Insert INODE_ITEM */
|
|
|
|
ret = btrfs_new_inode(trans, root, ino, 0755 | S_IFDIR);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* then INODE_REF */
|
|
|
|
ret = btrfs_insert_inode_ref(trans, root, name, strlen(name), ino, ino,
|
|
|
|
0);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Update nlink of that inode item */
|
|
|
|
key.objectid = ino;
|
|
|
|
key.type = BTRFS_INODE_ITEM_KEY;
|
|
|
|
key.offset = 0;
|
|
|
|
btrfs_init_path(&path);
|
|
|
|
|
|
|
|
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
|
|
|
|
if (ret > 0) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
inode = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
|
|
|
struct btrfs_inode_item);
|
|
|
|
btrfs_set_inode_nlink(path.nodes[0], inode, 1);
|
|
|
|
btrfs_mark_buffer_dirty(path.nodes[0]);
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return 0;
|
|
|
|
out:
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-01-03 15:32:21 +08:00
|
|
|
static int create_uuid_tree(struct btrfs_trans_handle *trans)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
|
struct btrfs_root *root;
|
2022-03-08 06:11:01 +08:00
|
|
|
struct btrfs_key key = {
|
|
|
|
.objectid = BTRFS_UUID_TREE_OBJECTID,
|
|
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
|
|
};
|
2019-01-03 15:32:21 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2023-04-20 05:13:46 +08:00
|
|
|
UASSERT(fs_info->uuid_root == NULL);
|
2022-03-08 06:11:01 +08:00
|
|
|
root = btrfs_create_tree(trans, fs_info, &key);
|
2019-01-03 15:32:21 +08:00
|
|
|
if (IS_ERR(root)) {
|
|
|
|
ret = PTR_ERR(root);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
add_root_to_dirty_list(root);
|
|
|
|
fs_info->uuid_root = root;
|
|
|
|
ret = btrfs_uuid_tree_add(trans, fs_info->fs_root->root_item.uuid,
|
|
|
|
BTRFS_UUID_KEY_SUBVOL,
|
|
|
|
fs_info->fs_root->root_key.objectid);
|
|
|
|
if (ret < 0)
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-03-08 06:11:03 +08:00
|
|
|
static int create_global_root(struct btrfs_trans_handle *trans, u64 objectid,
|
|
|
|
int root_id)
|
|
|
|
{
|
|
|
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
|
|
struct btrfs_root *root;
|
|
|
|
struct btrfs_key key = {
|
|
|
|
.objectid = objectid,
|
|
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
|
|
.offset = root_id,
|
|
|
|
};
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
root = btrfs_create_tree(trans, fs_info, &key);
|
|
|
|
if (IS_ERR(root)) {
|
|
|
|
ret = PTR_ERR(root);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ret = btrfs_global_root_insert(fs_info, root);
|
|
|
|
out:
|
|
|
|
if (ret)
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int create_global_roots(struct btrfs_trans_handle *trans,
|
|
|
|
int nr_global_roots)
|
|
|
|
{
|
|
|
|
int ret, i;
|
|
|
|
|
|
|
|
for (i = 1; i < nr_global_roots; i++) {
|
|
|
|
ret = create_global_root(trans, BTRFS_EXTENT_TREE_OBJECTID, i);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = create_global_root(trans, BTRFS_CSUM_TREE_OBJECTID, i);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = create_global_root(trans, BTRFS_FREE_SPACE_TREE_OBJECTID, i);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
btrfs_set_super_nr_global_roots(trans->fs_info->super_copy,
|
|
|
|
nr_global_roots);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-19 04:21:44 +08:00
|
|
|
static int insert_qgroup_items(struct btrfs_trans_handle *trans,
|
|
|
|
struct btrfs_fs_info *fs_info,
|
|
|
|
u64 qgroupid)
|
|
|
|
{
|
|
|
|
struct btrfs_path path;
|
|
|
|
struct btrfs_root *quota_root = fs_info->quota_root;
|
|
|
|
struct btrfs_key key;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT) {
|
|
|
|
error("qgroup level other than 0 is not supported yet");
|
|
|
|
return -ENOTTY;
|
|
|
|
}
|
|
|
|
|
|
|
|
key.objectid = 0;
|
|
|
|
key.type = BTRFS_QGROUP_INFO_KEY;
|
|
|
|
key.offset = qgroupid;
|
|
|
|
|
|
|
|
btrfs_init_path(&path);
|
|
|
|
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
|
|
|
|
sizeof(struct btrfs_qgroup_info_item));
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
key.objectid = 0;
|
|
|
|
key.type = BTRFS_QGROUP_LIMIT_KEY;
|
|
|
|
key.offset = qgroupid;
|
|
|
|
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
|
|
|
|
sizeof(struct btrfs_qgroup_limit_item));
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
/*
|
|
|
|
* Workaround for squota so the enable_gen can be properly used.
|
|
|
|
*/
|
|
|
|
static int touch_root_subvol(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
struct btrfs_trans_handle *trans;
|
|
|
|
struct btrfs_key key = {
|
|
|
|
.objectid = BTRFS_FIRST_FREE_OBJECTID,
|
|
|
|
.type = BTRFS_INODE_ITEM_KEY,
|
|
|
|
.offset = 0,
|
|
|
|
};
|
|
|
|
struct extent_buffer *leaf;
|
|
|
|
int slot;
|
|
|
|
struct btrfs_path path;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
trans = btrfs_start_transaction(fs_info->fs_root, 1);
|
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
ret = PTR_ERR(trans);
|
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
btrfs_init_path(&path);
|
|
|
|
ret = btrfs_search_slot(trans, fs_info->fs_root, &key, &path, 0, 1);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
leaf = path.nodes[0];
|
|
|
|
slot = path.slots[0];
|
|
|
|
btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
|
|
ret = btrfs_commit_transaction(trans, fs_info->fs_root);
|
|
|
|
if (ret < 0) {
|
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
btrfs_release_path(&path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-03-19 04:21:45 +08:00
|
|
|
static int setup_quota_root(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
struct btrfs_trans_handle *trans;
|
|
|
|
struct btrfs_qgroup_status_item *qsi;
|
|
|
|
struct btrfs_root *quota_root;
|
|
|
|
struct btrfs_path path;
|
|
|
|
struct btrfs_key key;
|
|
|
|
int qgroup_repaired = 0;
|
2023-09-28 01:46:46 +08:00
|
|
|
bool simple = btrfs_fs_incompat(fs_info, SIMPLE_QUOTA);
|
|
|
|
int flags;
|
2020-03-19 04:21:45 +08:00
|
|
|
int ret;
|
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
|
2020-03-19 04:21:45 +08:00
|
|
|
/* One to modify tree root, one for quota root */
|
|
|
|
trans = btrfs_start_transaction(fs_info->tree_root, 2);
|
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
ret = PTR_ERR(trans);
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
2020-03-19 04:21:45 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
ret = btrfs_create_root(trans, fs_info, BTRFS_QUOTA_TREE_OBJECTID);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to create quota root: %d (%m)", ret);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
quota_root = fs_info->quota_root;
|
|
|
|
|
|
|
|
key.objectid = 0;
|
|
|
|
key.type = BTRFS_QGROUP_STATUS_KEY;
|
|
|
|
key.offset = 0;
|
|
|
|
|
|
|
|
btrfs_init_path(&path);
|
|
|
|
ret = btrfs_insert_empty_item(trans, quota_root, &path, &key,
|
|
|
|
sizeof(*qsi));
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to insert qgroup status item: %d (%m)", ret);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
qsi = btrfs_item_ptr(path.nodes[0], path.slots[0],
|
|
|
|
struct btrfs_qgroup_status_item);
|
2023-09-28 01:46:46 +08:00
|
|
|
btrfs_set_qgroup_status_generation(path.nodes[0], qsi, trans->transid);
|
2020-03-19 04:21:45 +08:00
|
|
|
btrfs_set_qgroup_status_rescan(path.nodes[0], qsi, 0);
|
2023-09-28 01:46:46 +08:00
|
|
|
flags = BTRFS_QGROUP_STATUS_FLAG_ON;
|
|
|
|
if (simple) {
|
|
|
|
btrfs_set_qgroup_status_enable_gen(path.nodes[0], qsi, trans->transid);
|
|
|
|
flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
|
|
|
}
|
2020-03-19 04:21:45 +08:00
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
btrfs_set_qgroup_status_version(path.nodes[0], qsi, 1);
|
|
|
|
btrfs_set_qgroup_status_flags(path.nodes[0], qsi, flags);
|
2020-03-19 04:21:45 +08:00
|
|
|
btrfs_release_path(&path);
|
|
|
|
|
|
|
|
/* Currently mkfs will only create one subvolume */
|
|
|
|
ret = insert_qgroup_items(trans, fs_info, BTRFS_FS_TREE_OBJECTID);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to insert qgroup items: %d (%m)", ret);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = btrfs_commit_transaction(trans, fs_info->tree_root);
|
|
|
|
if (ret < 0) {
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
|
2020-03-19 04:21:45 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
/* Hack to count the default subvol metadata by dirtying it */
|
|
|
|
if (simple) {
|
|
|
|
ret = touch_root_subvol(fs_info);
|
|
|
|
if (ret) {
|
|
|
|
error("failed to touch root dir for simple quota accounting %d (%m)", ret);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-19 04:21:45 +08:00
|
|
|
/*
|
|
|
|
* Qgroup is setup but with wrong info, use qgroup-verify
|
|
|
|
* infrastructure to repair them. (Just acts as offline rescan)
|
|
|
|
*/
|
|
|
|
ret = qgroup_verify_all(fs_info);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("qgroup rescan failed: %d (%m)", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
ret = repair_qgroups(fs_info, &qgroup_repaired, true);
|
|
|
|
if (ret < 0)
|
|
|
|
error("failed to fill qgroup info: %d (%m)", ret);
|
|
|
|
return ret;
|
|
|
|
fail:
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-09-15 00:05:35 +08:00
|
|
|
static int setup_raid_stripe_tree_root(struct btrfs_fs_info *fs_info)
|
|
|
|
{
|
|
|
|
struct btrfs_trans_handle *trans;
|
|
|
|
struct btrfs_root *stripe_root;
|
|
|
|
struct btrfs_key key = {
|
|
|
|
.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID,
|
|
|
|
.type = BTRFS_ROOT_ITEM_KEY,
|
|
|
|
};
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
trans = btrfs_start_transaction(fs_info->tree_root, 0);
|
|
|
|
if (IS_ERR(trans))
|
|
|
|
return PTR_ERR(trans);
|
|
|
|
|
|
|
|
stripe_root = btrfs_create_tree(trans, fs_info, &key);
|
|
|
|
if (IS_ERR(stripe_root)) {
|
|
|
|
ret = PTR_ERR(stripe_root);
|
|
|
|
btrfs_abort_transaction(trans, ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
fs_info->stripe_root = stripe_root;
|
|
|
|
add_root_to_dirty_list(stripe_root);
|
|
|
|
|
|
|
|
ret = btrfs_commit_transaction(trans, fs_info->tree_root);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-09-04 18:47:20 +08:00
|
|
|
/* Thread callback for device preparation */
|
|
|
|
static void *prepare_one_device(void *ctx)
|
|
|
|
{
|
|
|
|
struct prepare_device_progress *prepare_ctx = ctx;
|
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
prepare_ctx->fd = open(prepare_ctx->file, opt_oflags);
|
|
|
|
if (prepare_ctx->fd < 0) {
|
2022-09-04 18:47:20 +08:00
|
|
|
error("unable to open %s: %m", prepare_ctx->file);
|
|
|
|
prepare_ctx->ret = -errno;
|
|
|
|
return NULL;
|
|
|
|
}
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
prepare_ctx->ret = btrfs_prepare_device(prepare_ctx->fd,
|
|
|
|
prepare_ctx->file,
|
2022-09-04 18:47:20 +08:00
|
|
|
&prepare_ctx->dev_block_count,
|
|
|
|
prepare_ctx->block_count,
|
|
|
|
(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
|
|
|
|
(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
|
|
|
|
(opt_discard ? PREP_DEVICE_DISCARD : 0) |
|
|
|
|
(opt_zoned ? PREP_DEVICE_ZONED : 0));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-06-22 00:23:19 +08:00
|
|
|
int BOX_MAIN(mkfs)(int argc, char **argv)
|
2007-03-21 08:35:03 +08:00
|
|
|
{
|
|
|
|
char *file;
|
2008-04-18 22:31:42 +08:00
|
|
|
struct btrfs_root *root;
|
2016-08-23 22:18:33 +08:00
|
|
|
struct btrfs_fs_info *fs_info;
|
2008-04-18 22:31:42 +08:00
|
|
|
struct btrfs_trans_handle *trans;
|
2023-06-13 18:26:53 +08:00
|
|
|
struct open_ctree_args oca = { 0 };
|
2017-11-29 16:07:34 +08:00
|
|
|
int ret = 0;
|
2017-10-19 13:41:35 +08:00
|
|
|
int close_ret;
|
2008-04-18 22:31:42 +08:00
|
|
|
int i;
|
2021-09-28 02:13:26 +08:00
|
|
|
bool ssd = false;
|
2017-10-19 17:13:55 +08:00
|
|
|
bool shrink_rootdir = false;
|
2011-06-21 09:45:59 +08:00
|
|
|
u64 source_dir_size = 0;
|
2017-10-16 16:22:56 +08:00
|
|
|
u64 min_dev_size;
|
2017-10-19 17:13:55 +08:00
|
|
|
u64 shrink_size;
|
2022-09-30 14:12:54 +08:00
|
|
|
int device_count = 0;
|
2013-04-15 14:38:09 +08:00
|
|
|
int saved_optind;
|
2022-09-04 18:47:20 +08:00
|
|
|
pthread_t *t_prepare = NULL;
|
|
|
|
struct prepare_device_progress *prepare_ctx = NULL;
|
2015-06-08 18:54:54 +08:00
|
|
|
struct mkfs_allocation allocation = { 0 };
|
2015-07-01 23:49:21 +08:00
|
|
|
struct btrfs_mkfs_config mkfs_cfg;
|
2021-04-26 14:27:38 +08:00
|
|
|
u64 system_group_size;
|
2022-09-30 14:07:25 +08:00
|
|
|
/* Options */
|
|
|
|
bool force_overwrite = false;
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
struct btrfs_mkfs_features features = btrfs_mkfs_default_features;
|
2022-09-30 14:07:25 +08:00
|
|
|
enum btrfs_csum_type csum_type = BTRFS_CSUM_TYPE_CRC32;
|
|
|
|
char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
|
|
|
|
u32 nodesize = 0;
|
|
|
|
bool nodesize_forced = false;
|
|
|
|
u32 sectorsize = 0;
|
|
|
|
u32 stripesize = 4096;
|
|
|
|
u64 metadata_profile = 0;
|
2022-09-30 14:09:57 +08:00
|
|
|
bool metadata_profile_set = false;
|
2022-09-30 14:07:25 +08:00
|
|
|
u64 data_profile = 0;
|
2022-09-30 14:09:57 +08:00
|
|
|
bool data_profile_set = false;
|
2022-09-30 14:07:25 +08:00
|
|
|
u64 block_count = 0;
|
|
|
|
u64 dev_block_count = 0;
|
|
|
|
bool mixed = false;
|
|
|
|
char *label = NULL;
|
2022-03-08 06:11:03 +08:00
|
|
|
int nr_global_roots = sysconf(_SC_NPROCESSORS_ONLN);
|
2022-09-30 14:07:25 +08:00
|
|
|
char *source_dir = NULL;
|
2010-07-08 17:17:59 +08:00
|
|
|
|
2023-02-16 10:30:46 +08:00
|
|
|
cpu_detect_flags();
|
|
|
|
hash_init_accel();
|
2021-09-28 02:26:32 +08:00
|
|
|
btrfs_config_init();
|
btrfs-progs: mkfs: fix a stack over-flow when features string are too long
[BUG]
Even with chunk_objectid bug fixed, mkfs.btrfs can still caused stack
overflow when enabling extent-tree-v2 feature (need experimental
features enabled):
# ./mkfs.btrfs -f -O extent-tree-v2 ~/test.img
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
NOTE: several default settings have changed in version 5.15, please make sure
this does not affect your deployments:
- DUP for metadata (-m dup)
- enabled no-holes (-O no-holes)
- enabled free-space-tree (-R free-space-tree)
Label: (null)
UUID: 205c61e7-f58e-4e8f-9dc2-38724f5c554b
Node size: 16384
Sector size: 4096
Filesystem size: 512.00MiB
Block group profiles:
Data: single 8.00MiB
Metadata: DUP 32.00MiB
System: DUP 8.00MiB
SSD detected: no
Zoned device: no
=================================================================
[... Skip full ASAN output ...]
==65655==ABORTING
[CAUSE]
For experimental build, we have unified feature output, but the old
buffer size is only 64 bytes, which is too small to cover the new full
feature string:
extref, skinny-metadata, no-holes, free-space-tree, block-group-tree, extent-tree-v2
Above feature string is already 84 bytes, over the 64 on-stack memory
size.
This can also be proved by the ASAN output:
==65655==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffc4e03b1d0 at pc 0x7ff0fc05fafe bp 0x7ffc4e03ac60 sp 0x7ffc4e03a408
WRITE of size 17 at 0x7ffc4e03b1d0 thread T0
#0 0x7ff0fc05fafd in __interceptor_strcat /usr/src/debug/gcc/libsanitizer/asan/asan_interceptors.cpp:377
#1 0x55cdb7b06ca5 in parse_features_to_string common/fsfeatures.c:316
#2 0x55cdb7b06ce1 in btrfs_parse_fs_features_to_string common/fsfeatures.c:324
#3 0x55cdb7a37226 in main mkfs/main.c:1783
#4 0x7ff0fbe3c28f (/usr/lib/libc.so.6+0x2328f)
#5 0x7ff0fbe3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349)
#6 0x55cdb7a2cb34 in _start ../sysdeps/x86_64/start.S:115
[FIX]
Introduce a new macro, BTRFS_FEATURE_STRING_BUF_SIZE, along with a new
sanity check helper, btrfs_assert_feature_buf_size().
The problem is I can not find a build time method to verify
BTRFS_FEATURE_STRING_BUF_SIZE is large enough to contain all feature
names, thus have to go the runtime function to do the BUG_ON() to verify
the macro size.
Now the minimal buffer size for experimental build is 138 bytes, just
bump it to 160 for future expansion.
And if further features go beyond that number, mkfs.btrfs/btrfs-convert
will immediately crash at that BUG_ON(), so we can definitely detect it.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-07 20:03:01 +08:00
|
|
|
btrfs_assert_feature_buf_size();
|
2019-05-27 12:46:27 +08:00
|
|
|
|
2007-10-16 04:25:14 +08:00
|
|
|
while(1) {
|
|
|
|
int c;
|
2022-03-08 06:11:03 +08:00
|
|
|
enum {
|
2022-06-21 08:20:21 +08:00
|
|
|
GETOPT_VAL_SHRINK = GETOPT_VAL_FIRST,
|
2022-03-08 06:11:03 +08:00
|
|
|
GETOPT_VAL_CHECKSUM,
|
|
|
|
GETOPT_VAL_GLOBAL_ROOTS,
|
|
|
|
};
|
2015-01-19 20:44:49 +08:00
|
|
|
static const struct option long_options[] = {
|
2015-04-08 23:39:51 +08:00
|
|
|
{ "byte-count", required_argument, NULL, 'b' },
|
2019-09-03 23:00:42 +08:00
|
|
|
{ "csum", required_argument, NULL,
|
|
|
|
GETOPT_VAL_CHECKSUM },
|
|
|
|
{ "checksum", required_argument, NULL,
|
|
|
|
GETOPT_VAL_CHECKSUM },
|
2015-04-08 23:39:51 +08:00
|
|
|
{ "force", no_argument, NULL, 'f' },
|
|
|
|
{ "leafsize", required_argument, NULL, 'l' },
|
|
|
|
{ "label", required_argument, NULL, 'L'},
|
|
|
|
{ "metadata", required_argument, NULL, 'm' },
|
|
|
|
{ "mixed", no_argument, NULL, 'M' },
|
|
|
|
{ "nodesize", required_argument, NULL, 'n' },
|
|
|
|
{ "sectorsize", required_argument, NULL, 's' },
|
|
|
|
{ "data", required_argument, NULL, 'd' },
|
|
|
|
{ "version", no_argument, NULL, 'V' },
|
|
|
|
{ "rootdir", required_argument, NULL, 'r' },
|
|
|
|
{ "nodiscard", no_argument, NULL, 'K' },
|
|
|
|
{ "features", required_argument, NULL, 'O' },
|
2018-05-08 14:31:53 +08:00
|
|
|
{ "runtime-features", required_argument, NULL, 'R' },
|
2015-01-19 20:30:06 +08:00
|
|
|
{ "uuid", required_argument, NULL, 'U' },
|
2014-12-18 04:14:05 +08:00
|
|
|
{ "quiet", 0, NULL, 'q' },
|
2021-09-28 02:18:07 +08:00
|
|
|
{ "verbose", 0, NULL, 'v' },
|
2017-10-19 17:13:55 +08:00
|
|
|
{ "shrink", no_argument, NULL, GETOPT_VAL_SHRINK },
|
2022-03-08 06:11:03 +08:00
|
|
|
#if EXPERIMENTAL
|
|
|
|
{ "num-global-roots", required_argument, NULL, GETOPT_VAL_GLOBAL_ROOTS },
|
|
|
|
#endif
|
2015-06-11 06:04:19 +08:00
|
|
|
{ "help", no_argument, NULL, GETOPT_VAL_HELP },
|
2015-01-19 20:30:06 +08:00
|
|
|
{ NULL, 0, NULL, 0}
|
|
|
|
};
|
|
|
|
|
2021-09-28 02:18:07 +08:00
|
|
|
c = getopt_long(argc, argv, "A:b:fl:n:s:m:d:L:R:O:r:U:VvMKq",
|
2015-04-08 23:33:55 +08:00
|
|
|
long_options, NULL);
|
2007-10-16 04:25:14 +08:00
|
|
|
if (c < 0)
|
|
|
|
break;
|
|
|
|
switch(c) {
|
2013-02-15 02:30:03 +08:00
|
|
|
case 'f':
|
2021-09-28 02:13:26 +08:00
|
|
|
force_overwrite = true;
|
2013-02-15 02:30:03 +08:00
|
|
|
break;
|
2008-04-04 04:35:48 +08:00
|
|
|
case 'd':
|
2022-09-14 22:53:54 +08:00
|
|
|
ret = parse_bg_profile(optarg, &data_profile);
|
|
|
|
if (ret) {
|
|
|
|
error("unknown data profile %s", optarg);
|
|
|
|
exit(1);
|
|
|
|
}
|
2022-09-30 14:09:57 +08:00
|
|
|
data_profile_set = true;
|
2008-04-04 04:35:48 +08:00
|
|
|
break;
|
2007-10-16 04:25:14 +08:00
|
|
|
case 'l':
|
2022-09-30 14:27:12 +08:00
|
|
|
/* Deprecated in 4.0 */
|
|
|
|
error("--leafsize has been removed in 6.0, use --nodesize");
|
|
|
|
ret = 1;
|
|
|
|
goto error;
|
2012-03-27 04:17:08 +08:00
|
|
|
case 'n':
|
2021-01-21 23:25:51 +08:00
|
|
|
nodesize = parse_size_from_string(optarg);
|
2021-09-28 02:13:26 +08:00
|
|
|
nodesize_forced = true;
|
2007-10-16 04:25:14 +08:00
|
|
|
break;
|
2008-04-18 22:31:42 +08:00
|
|
|
case 'L':
|
2022-09-14 22:57:36 +08:00
|
|
|
free(label);
|
|
|
|
ret = strlen(optarg);
|
|
|
|
if (ret >= BTRFS_LABEL_SIZE) {
|
|
|
|
error("label %s is too long (max %d)",
|
|
|
|
optarg, BTRFS_LABEL_SIZE - 1);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
label = strdup(optarg);
|
2008-04-18 22:31:42 +08:00
|
|
|
break;
|
|
|
|
case 'm':
|
2022-09-14 22:53:54 +08:00
|
|
|
ret = parse_bg_profile(optarg, &metadata_profile);
|
|
|
|
if (ret) {
|
|
|
|
error("unknown metadata profile %s", optarg);
|
|
|
|
exit(1);
|
|
|
|
}
|
2022-09-30 14:09:57 +08:00
|
|
|
metadata_profile_set = true;
|
2010-12-10 02:31:08 +08:00
|
|
|
break;
|
|
|
|
case 'M':
|
2021-09-28 02:13:26 +08:00
|
|
|
mixed = true;
|
2008-04-18 22:31:42 +08:00
|
|
|
break;
|
2013-05-16 23:04:04 +08:00
|
|
|
case 'O': {
|
|
|
|
char *orig = strdup(optarg);
|
|
|
|
char *tmp = orig;
|
|
|
|
|
2015-03-24 02:20:37 +08:00
|
|
|
tmp = btrfs_parse_fs_features(tmp, &features);
|
2013-05-16 23:04:04 +08:00
|
|
|
if (tmp) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("unrecognized filesystem feature '%s'",
|
2013-05-16 23:04:04 +08:00
|
|
|
tmp);
|
|
|
|
free(orig);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-05-16 23:04:04 +08:00
|
|
|
}
|
|
|
|
free(orig);
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.runtime_flags &
|
|
|
|
BTRFS_FEATURE_RUNTIME_LIST_ALL) {
|
|
|
|
btrfs_list_all_fs_features(NULL);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto success;
|
2013-05-16 23:04:04 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-05-08 14:31:53 +08:00
|
|
|
case 'R': {
|
|
|
|
char *orig = strdup(optarg);
|
|
|
|
char *tmp = orig;
|
|
|
|
|
2023-04-11 10:31:05 +08:00
|
|
|
warning("runtime features are deprecated, use -O|--features instead");
|
2018-05-08 14:31:53 +08:00
|
|
|
tmp = btrfs_parse_runtime_features(tmp,
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
&features);
|
2018-05-08 14:31:53 +08:00
|
|
|
if (tmp) {
|
|
|
|
error("unrecognized runtime feature '%s'",
|
|
|
|
tmp);
|
|
|
|
free(orig);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
free(orig);
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.runtime_flags &
|
|
|
|
BTRFS_FEATURE_RUNTIME_LIST_ALL) {
|
|
|
|
btrfs_list_all_runtime_features(NULL);
|
2018-05-08 14:31:53 +08:00
|
|
|
goto success;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2007-12-01 00:30:24 +08:00
|
|
|
case 's':
|
2021-01-21 23:25:51 +08:00
|
|
|
sectorsize = parse_size_from_string(optarg);
|
2007-12-01 00:30:24 +08:00
|
|
|
break;
|
2008-03-25 03:04:49 +08:00
|
|
|
case 'b':
|
2021-01-21 23:25:51 +08:00
|
|
|
block_count = parse_size_from_string(optarg);
|
2022-09-04 18:47:20 +08:00
|
|
|
opt_zero_end = false;
|
2008-03-25 03:04:49 +08:00
|
|
|
break;
|
2021-09-28 02:18:07 +08:00
|
|
|
case 'v':
|
2021-09-28 02:26:32 +08:00
|
|
|
bconf_be_verbose();
|
2021-09-28 02:18:07 +08:00
|
|
|
break;
|
2009-06-04 00:00:20 +08:00
|
|
|
case 'V':
|
2016-08-23 22:12:36 +08:00
|
|
|
printf("mkfs.btrfs, part of %s\n",
|
|
|
|
PACKAGE_STRING);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto success;
|
2010-07-08 17:17:59 +08:00
|
|
|
case 'r':
|
2022-09-14 23:03:18 +08:00
|
|
|
free(source_dir);
|
|
|
|
source_dir = strdup(optarg);
|
2010-07-08 17:17:59 +08:00
|
|
|
break;
|
2014-05-15 01:39:07 +08:00
|
|
|
case 'U':
|
2014-12-18 04:14:09 +08:00
|
|
|
strncpy(fs_uuid, optarg,
|
|
|
|
BTRFS_UUID_UNPARSED_SIZE - 1);
|
2014-05-15 01:39:07 +08:00
|
|
|
break;
|
2012-07-17 18:30:16 +08:00
|
|
|
case 'K':
|
2022-09-04 18:47:20 +08:00
|
|
|
opt_discard = false;
|
2012-07-06 22:11:10 +08:00
|
|
|
break;
|
2014-12-18 04:14:05 +08:00
|
|
|
case 'q':
|
2021-09-28 02:26:32 +08:00
|
|
|
bconf_be_quiet();
|
2014-12-18 04:14:05 +08:00
|
|
|
break;
|
2017-10-19 17:13:55 +08:00
|
|
|
case GETOPT_VAL_SHRINK:
|
|
|
|
shrink_rootdir = true;
|
|
|
|
break;
|
2019-09-03 23:00:42 +08:00
|
|
|
case GETOPT_VAL_CHECKSUM:
|
|
|
|
csum_type = parse_csum_type(optarg);
|
|
|
|
break;
|
2022-03-08 06:11:03 +08:00
|
|
|
case GETOPT_VAL_GLOBAL_ROOTS:
|
2022-10-20 22:36:10 +08:00
|
|
|
btrfs_warn_experimental("Feature: num-global-roots is part of exten-tree-v2");
|
2022-03-08 06:11:03 +08:00
|
|
|
nr_global_roots = (int)arg_strtou64(optarg);
|
|
|
|
break;
|
2015-06-11 06:04:19 +08:00
|
|
|
case GETOPT_VAL_HELP:
|
2007-10-16 04:25:14 +08:00
|
|
|
default:
|
2023-02-22 07:28:06 +08:00
|
|
|
usage(&mkfs_cmd, c != GETOPT_VAL_HELP);
|
2007-10-16 04:25:14 +08:00
|
|
|
}
|
|
|
|
}
|
2015-09-26 00:15:44 +08:00
|
|
|
|
2021-09-28 02:26:32 +08:00
|
|
|
if (bconf.verbose) {
|
2015-10-30 23:58:52 +08:00
|
|
|
printf("%s\n", PACKAGE_STRING);
|
|
|
|
printf("See %s for more information.\n\n", PACKAGE_URL);
|
|
|
|
}
|
|
|
|
|
2020-10-13 09:06:02 +08:00
|
|
|
if (!sectorsize)
|
|
|
|
sectorsize = (u32)sysconf(_SC_PAGESIZE);
|
|
|
|
if (btrfs_check_sectorsize(sectorsize))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (!nodesize)
|
|
|
|
nodesize = max_t(u32, sectorsize, BTRFS_MKFS_DEFAULT_NODE_SIZE);
|
|
|
|
|
2016-06-17 13:37:54 +08:00
|
|
|
stripesize = sectorsize;
|
2013-04-15 14:38:09 +08:00
|
|
|
saved_optind = optind;
|
2022-09-30 14:12:54 +08:00
|
|
|
device_count = argc - optind;
|
|
|
|
if (device_count == 0)
|
2023-02-22 07:28:06 +08:00
|
|
|
usage(&mkfs_cmd, 1);
|
2008-03-25 03:04:49 +08:00
|
|
|
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
opt_zoned = !!(features.incompat_flags & BTRFS_FEATURE_INCOMPAT_ZONED);
|
2021-04-26 14:27:36 +08:00
|
|
|
|
2023-05-27 03:13:06 +08:00
|
|
|
if (source_dir && device_count > 1) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("the option -r is limited to a single device");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-04-15 14:38:09 +08:00
|
|
|
}
|
2023-05-27 03:13:06 +08:00
|
|
|
if (shrink_rootdir && source_dir == NULL) {
|
2017-10-19 17:13:55 +08:00
|
|
|
error("the option --shrink must be used with --rootdir");
|
|
|
|
goto error;
|
|
|
|
}
|
2014-05-15 01:39:07 +08:00
|
|
|
|
2014-12-18 04:14:09 +08:00
|
|
|
if (*fs_uuid) {
|
2014-05-15 01:39:07 +08:00
|
|
|
uuid_t dummy_uuid;
|
|
|
|
|
|
|
|
if (uuid_parse(fs_uuid, dummy_uuid) != 0) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("could not parse UUID: %s", fs_uuid);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2014-05-15 01:39:07 +08:00
|
|
|
}
|
2023-09-28 09:09:18 +08:00
|
|
|
/* We allow non-unique fsid for single device btrfs filesystem. */
|
|
|
|
if (device_count != 1 && !test_uuid_unique(fs_uuid)) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("non-unique UUID: %s", fs_uuid);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2014-05-15 01:39:07 +08:00
|
|
|
}
|
|
|
|
}
|
2015-10-15 01:39:37 +08:00
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
for (i = 0; i < device_count; i++) {
|
2016-03-01 23:28:11 +08:00
|
|
|
file = argv[optind++];
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
|
2023-05-27 03:13:06 +08:00
|
|
|
if (source_dir && path_exists(file) == 0)
|
2017-11-29 16:07:34 +08:00
|
|
|
ret = 0;
|
2019-07-02 03:29:43 +08:00
|
|
|
else if (path_is_block_device(file) == 1)
|
2017-11-24 13:21:15 +08:00
|
|
|
ret = test_dev_for_mkfs(file, force_overwrite);
|
|
|
|
else
|
|
|
|
ret = test_status_for_mkfs(file, force_overwrite);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
goto error;
|
2013-04-15 14:38:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
optind = saved_optind;
|
2022-09-30 14:12:54 +08:00
|
|
|
device_count = argc - optind;
|
2013-04-15 14:38:09 +08:00
|
|
|
|
2016-03-01 23:28:11 +08:00
|
|
|
file = argv[optind++];
|
2022-09-14 22:42:53 +08:00
|
|
|
ssd = device_get_rotational(file);
|
2022-09-04 18:47:20 +08:00
|
|
|
if (opt_zoned) {
|
2021-04-26 14:27:36 +08:00
|
|
|
if (!zone_size(file)) {
|
|
|
|
error("zoned: %s: zone size undefined", file);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
} else if (zoned_model(file) == ZONED_HOST_MANAGED) {
|
2021-09-28 02:26:32 +08:00
|
|
|
if (bconf.verbose)
|
2021-04-26 14:27:36 +08:00
|
|
|
printf(
|
|
|
|
"Zoned: %s: host-managed device detected, setting zoned feature\n",
|
|
|
|
file);
|
2022-09-04 18:47:20 +08:00
|
|
|
opt_zoned = true;
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_ZONED;
|
2021-04-26 14:27:36 +08:00
|
|
|
}
|
2013-08-07 20:11:25 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set default profiles according to number of added devices.
|
|
|
|
* For mixed groups defaults are single/single.
|
|
|
|
*/
|
|
|
|
if (!mixed) {
|
2020-07-21 18:13:27 +08:00
|
|
|
u64 tmp;
|
|
|
|
|
2022-09-30 14:09:57 +08:00
|
|
|
if (!metadata_profile_set) {
|
2022-09-30 14:12:54 +08:00
|
|
|
if (device_count > 1)
|
2020-07-21 18:13:27 +08:00
|
|
|
tmp = BTRFS_MKFS_DEFAULT_META_MULTI_DEVICE;
|
2021-09-28 06:29:53 +08:00
|
|
|
else
|
|
|
|
tmp = BTRFS_MKFS_DEFAULT_META_ONE_DEVICE;
|
2020-07-21 18:13:27 +08:00
|
|
|
metadata_profile = tmp;
|
2013-08-07 20:11:25 +08:00
|
|
|
}
|
2022-09-30 14:09:57 +08:00
|
|
|
if (!data_profile_set) {
|
2022-09-30 14:12:54 +08:00
|
|
|
if (device_count > 1)
|
2020-07-21 18:13:27 +08:00
|
|
|
tmp = BTRFS_MKFS_DEFAULT_DATA_MULTI_DEVICE;
|
|
|
|
else
|
|
|
|
tmp = BTRFS_MKFS_DEFAULT_DATA_ONE_DEVICE;
|
|
|
|
data_profile = tmp;
|
2013-08-07 20:11:25 +08:00
|
|
|
}
|
|
|
|
} else {
|
2022-09-30 14:09:57 +08:00
|
|
|
if (metadata_profile_set || data_profile_set) {
|
2013-11-15 19:11:09 +08:00
|
|
|
if (metadata_profile != data_profile) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error(
|
|
|
|
"with mixed block groups data and metadata profiles must be the same");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-11-15 19:11:09 +08:00
|
|
|
}
|
|
|
|
}
|
2013-11-09 02:51:52 +08:00
|
|
|
|
2015-09-26 00:15:44 +08:00
|
|
|
if (!nodesize_forced)
|
2022-04-03 15:20:11 +08:00
|
|
|
nodesize = sectorsize;
|
2013-08-07 20:11:25 +08:00
|
|
|
}
|
2015-10-15 01:40:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* FS features that can be set by other means than -O
|
|
|
|
* just set the bit here
|
|
|
|
*/
|
|
|
|
if (mixed)
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
|
2015-10-15 01:40:38 +08:00
|
|
|
|
2021-09-04 03:41:13 +08:00
|
|
|
if ((data_profile | metadata_profile) & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID56;
|
2020-08-26 00:51:02 +08:00
|
|
|
warning("RAID5/6 support has known problems is strongly discouraged\n"
|
|
|
|
"\t to be used besides testing or evaluation.\n");
|
2015-10-15 01:40:38 +08:00
|
|
|
}
|
|
|
|
|
2019-11-01 02:43:17 +08:00
|
|
|
if ((data_profile | metadata_profile) &
|
|
|
|
(BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)) {
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID1C34;
|
2019-11-01 02:43:17 +08:00
|
|
|
}
|
|
|
|
|
2021-11-06 04:40:27 +08:00
|
|
|
/* Extent tree v2 comes with a set of mandatory features. */
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) {
|
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_NO_HOLES;
|
|
|
|
features.compat_ro_flags |=
|
|
|
|
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |
|
|
|
|
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID |
|
|
|
|
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE;
|
2022-03-08 06:11:03 +08:00
|
|
|
|
|
|
|
if (!nr_global_roots) {
|
|
|
|
error("you must set a non-zero num-global-roots value");
|
|
|
|
exit(1);
|
|
|
|
}
|
2021-11-06 04:40:27 +08:00
|
|
|
}
|
|
|
|
|
2022-08-09 14:03:55 +08:00
|
|
|
/* Block group tree feature requires no-holes and free-space-tree. */
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE &&
|
|
|
|
(!(features.incompat_flags & BTRFS_FEATURE_INCOMPAT_NO_HOLES) ||
|
|
|
|
!(features.compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))) {
|
2022-08-09 14:03:55 +08:00
|
|
|
error("block group tree requires no-holes and free-space-tree features");
|
|
|
|
exit(1);
|
|
|
|
}
|
2022-09-04 18:47:20 +08:00
|
|
|
if (opt_zoned) {
|
2023-01-16 11:08:53 +08:00
|
|
|
const int blkid_version = blkid_get_library_version(NULL, NULL);
|
|
|
|
|
2023-05-27 03:13:06 +08:00
|
|
|
if (source_dir) {
|
2021-04-26 14:27:37 +08:00
|
|
|
error("the option -r and zoned mode are incompatible");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) {
|
2021-04-26 14:27:37 +08:00
|
|
|
error("cannot enable mixed-bg in zoned mode");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID56) {
|
2021-04-26 14:27:37 +08:00
|
|
|
error("cannot enable RAID5/6 in zoned mode");
|
|
|
|
exit(1);
|
|
|
|
}
|
2023-01-16 11:08:53 +08:00
|
|
|
|
|
|
|
if (blkid_version < 2380)
|
|
|
|
warning("libblkid < 2.38 does not support zoned mode's superblock location, update recommended");
|
2021-04-26 14:27:37 +08:00
|
|
|
}
|
|
|
|
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (btrfs_check_nodesize(nodesize, sectorsize, &features))
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-08-07 20:11:25 +08:00
|
|
|
|
2016-09-01 02:16:35 +08:00
|
|
|
if (sectorsize < sizeof(struct btrfs_super_block)) {
|
|
|
|
error("sectorsize smaller than superblock: %u < %zu",
|
|
|
|
sectorsize, sizeof(struct btrfs_super_block));
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2016-09-01 02:16:35 +08:00
|
|
|
}
|
|
|
|
|
2017-10-12 14:24:34 +08:00
|
|
|
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
|
|
|
|
data_profile);
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
/*
|
|
|
|
* Enlarge the destination file or create a new one, using the size
|
|
|
|
* calculated from source dir.
|
|
|
|
*
|
|
|
|
* This must be done before minimal device size checks.
|
|
|
|
*/
|
2023-05-27 03:13:06 +08:00
|
|
|
if (source_dir) {
|
2017-11-29 16:42:05 +08:00
|
|
|
int oflags = O_RDWR;
|
|
|
|
struct stat statbuf;
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
int fd;
|
2017-11-29 16:42:05 +08:00
|
|
|
|
2019-07-02 06:07:11 +08:00
|
|
|
if (path_exists(file) == 0)
|
2017-11-29 16:42:05 +08:00
|
|
|
oflags |= O_CREAT;
|
|
|
|
|
|
|
|
fd = open(file, oflags, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP |
|
|
|
|
S_IROTH);
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
if (fd < 0) {
|
2018-01-08 05:54:21 +08:00
|
|
|
error("unable to open %s: %m", file);
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
goto error;
|
|
|
|
}
|
2018-01-08 05:54:21 +08:00
|
|
|
|
2017-11-29 16:42:05 +08:00
|
|
|
ret = fstat(fd, &statbuf);
|
|
|
|
if (ret < 0) {
|
2018-01-08 05:54:21 +08:00
|
|
|
error("unable to stat %s: %m", file);
|
2017-11-29 16:42:05 +08:00
|
|
|
ret = -errno;
|
|
|
|
goto error;
|
|
|
|
}
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
|
2017-11-29 16:42:05 +08:00
|
|
|
/*
|
|
|
|
* Block_count not specified, use file/device size first.
|
|
|
|
* Or we will always use source_dir_size calculated for mkfs.
|
|
|
|
*/
|
|
|
|
if (!block_count)
|
2022-09-30 21:36:16 +08:00
|
|
|
block_count = device_get_partition_size_fd_stat(fd, &statbuf);
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
|
|
|
|
min_dev_size, metadata_profile, data_profile);
|
|
|
|
if (block_count < source_dir_size)
|
|
|
|
block_count = source_dir_size;
|
|
|
|
ret = zero_output_file(fd, block_count);
|
|
|
|
if (ret) {
|
|
|
|
error("unable to zero the output file");
|
|
|
|
close(fd);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/* our "device" is the new image file */
|
|
|
|
dev_block_count = block_count;
|
|
|
|
close(fd);
|
|
|
|
}
|
2015-02-02 22:51:15 +08:00
|
|
|
/* Check device/block_count after the nodesize is determined */
|
2017-10-16 16:22:56 +08:00
|
|
|
if (block_count && block_count < min_dev_size) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("size %llu is too small to make a usable filesystem",
|
2014-07-04 15:29:17 +08:00
|
|
|
block_count);
|
2016-08-19 00:38:34 +08:00
|
|
|
error("minimum size for btrfs filesystem is %llu",
|
2017-10-16 16:22:56 +08:00
|
|
|
min_dev_size);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2014-07-04 15:29:17 +08:00
|
|
|
}
|
2021-05-12 15:53:05 +08:00
|
|
|
/*
|
|
|
|
* 2 zones for the primary superblock
|
|
|
|
* 1 zone for the system block group
|
|
|
|
* 1 zone for a metadata block group
|
|
|
|
* 1 zone for a data block group
|
|
|
|
*/
|
2022-09-04 18:47:20 +08:00
|
|
|
if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
|
2021-05-12 15:53:05 +08:00
|
|
|
error("size %llu is too small to make a usable filesystem",
|
|
|
|
block_count);
|
|
|
|
error("minimum size for a zoned btrfs filesystem is %llu",
|
|
|
|
min_dev_size);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2022-09-30 14:12:54 +08:00
|
|
|
for (i = saved_optind; i < saved_optind + device_count; i++) {
|
2014-07-04 15:29:17 +08:00
|
|
|
char *path;
|
|
|
|
|
2016-03-01 23:28:11 +08:00
|
|
|
path = argv[i];
|
2017-10-16 16:22:56 +08:00
|
|
|
ret = test_minimum_size(path, min_dev_size);
|
2014-07-04 15:29:17 +08:00
|
|
|
if (ret < 0) {
|
2018-01-08 05:54:21 +08:00
|
|
|
error("failed to check size for %s: %m", path);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2014-07-04 15:29:17 +08:00
|
|
|
}
|
|
|
|
if (ret > 0) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("'%s' is too small to make a usable filesystem",
|
2014-07-04 15:29:17 +08:00
|
|
|
path);
|
2016-08-19 00:38:34 +08:00
|
|
|
error("minimum size for each btrfs device is %llu",
|
2017-10-16 16:22:56 +08:00
|
|
|
min_dev_size);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2014-07-04 15:29:17 +08:00
|
|
|
}
|
|
|
|
}
|
2013-08-07 20:11:25 +08:00
|
|
|
ret = test_num_disk_vs_raid(metadata_profile, data_profile,
|
2022-09-30 14:12:54 +08:00
|
|
|
device_count, mixed, ssd);
|
2015-06-11 06:51:15 +08:00
|
|
|
if (ret)
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-08-07 20:11:25 +08:00
|
|
|
|
2023-09-15 00:05:35 +08:00
|
|
|
#if EXPERIMENTAL
|
|
|
|
if (opt_zoned && device_count) {
|
|
|
|
switch (data_profile & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
|
|
|
case BTRFS_BLOCK_GROUP_DUP:
|
|
|
|
case BTRFS_BLOCK_GROUP_RAID1:
|
|
|
|
case BTRFS_BLOCK_GROUP_RAID1C3:
|
|
|
|
case BTRFS_BLOCK_GROUP_RAID1C4:
|
|
|
|
case BTRFS_BLOCK_GROUP_RAID0:
|
|
|
|
case BTRFS_BLOCK_GROUP_RAID10:
|
|
|
|
features.incompat_flags |= BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (opt_zoned) {
|
|
|
|
u64 metadata = BTRFS_BLOCK_GROUP_METADATA | metadata_profile;
|
|
|
|
u64 data = BTRFS_BLOCK_GROUP_DATA | data_profile;
|
|
|
|
bool rst = false;
|
|
|
|
|
|
|
|
#if EXPERIMENTAL
|
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE)
|
|
|
|
rst = true;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!zoned_profile_supported(metadata, rst) ||
|
|
|
|
!zoned_profile_supported(data, rst)) {
|
|
|
|
error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
|
|
|
|
goto error;
|
|
|
|
}
|
2021-04-26 14:27:37 +08:00
|
|
|
}
|
|
|
|
|
2022-09-30 14:12:54 +08:00
|
|
|
t_prepare = calloc(device_count, sizeof(*t_prepare));
|
|
|
|
prepare_ctx = calloc(device_count, sizeof(*prepare_ctx));
|
2013-04-15 14:38:09 +08:00
|
|
|
|
2022-09-04 18:47:20 +08:00
|
|
|
if (!t_prepare || !prepare_ctx) {
|
2022-09-30 15:12:06 +08:00
|
|
|
error_msg(ERROR_MSG_MEMORY, "thread for preparing devices");
|
2022-09-04 18:47:20 +08:00
|
|
|
goto error;
|
|
|
|
}
|
2021-10-05 14:23:05 +08:00
|
|
|
|
2022-09-04 18:47:20 +08:00
|
|
|
opt_oflags = O_RDWR;
|
2022-09-30 14:12:54 +08:00
|
|
|
for (i = 0; i < device_count; i++) {
|
2022-09-04 18:47:20 +08:00
|
|
|
if (opt_zoned &&
|
|
|
|
zoned_model(argv[optind + i - 1]) == ZONED_HOST_MANAGED) {
|
|
|
|
opt_oflags |= O_DIRECT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Start threads */
|
2022-09-30 14:12:54 +08:00
|
|
|
for (i = 0; i < device_count; i++) {
|
2022-09-04 18:47:20 +08:00
|
|
|
prepare_ctx[i].file = argv[optind + i - 1];
|
|
|
|
prepare_ctx[i].block_count = block_count;
|
|
|
|
prepare_ctx[i].dev_block_count = block_count;
|
|
|
|
ret = pthread_create(&t_prepare[i], NULL, prepare_one_device,
|
|
|
|
&prepare_ctx[i]);
|
|
|
|
if (ret) {
|
|
|
|
errno = -ret;
|
|
|
|
error("failed to create thread for prepare device %s: %m",
|
|
|
|
prepare_ctx[i].file);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wait for threads */
|
2022-09-30 14:12:54 +08:00
|
|
|
for (i = 0; i < device_count; i++)
|
2022-09-04 18:47:20 +08:00
|
|
|
pthread_join(t_prepare[i], NULL);
|
|
|
|
ret = prepare_ctx[0].ret;
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
error("unable prepare device: %s", prepare_ctx[0].file);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_block_count = prepare_ctx[0].dev_block_count;
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
if (block_count && block_count > dev_block_count) {
|
|
|
|
error("%s is smaller than requested size, expected %llu, found %llu",
|
2022-09-28 07:13:00 +08:00
|
|
|
file, block_count, dev_block_count);
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
goto error;
|
2007-03-21 08:35:03 +08:00
|
|
|
}
|
2013-01-20 02:06:21 +08:00
|
|
|
|
2013-09-05 14:53:34 +08:00
|
|
|
/* To create the first block group and chunk 0 in make_btrfs */
|
2022-09-04 18:47:20 +08:00
|
|
|
system_group_size = (opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE);
|
2021-04-26 14:27:38 +08:00
|
|
|
if (dev_block_count < system_group_size) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("device is too small to make filesystem, must be at least %llu",
|
2022-09-28 07:13:00 +08:00
|
|
|
system_group_size);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-09-05 14:53:34 +08:00
|
|
|
}
|
2013-01-20 02:06:21 +08:00
|
|
|
|
2021-09-07 22:38:56 +08:00
|
|
|
if (btrfs_bg_type_to_tolerated_failures(metadata_profile) <
|
|
|
|
btrfs_bg_type_to_tolerated_failures(data_profile))
|
2016-08-19 00:38:34 +08:00
|
|
|
warning("metadata has lower redundancy than data!\n");
|
2015-05-30 22:54:48 +08:00
|
|
|
|
2023-05-29 15:45:42 +08:00
|
|
|
if (bconf.verbose) {
|
|
|
|
printf("NOTE: several default settings have changed in version 5.15, please make sure\n");
|
|
|
|
printf(" this does not affect your deployments:\n");
|
|
|
|
printf(" - DUP for metadata (-m dup)\n");
|
|
|
|
printf(" - enabled no-holes (-O no-holes)\n");
|
|
|
|
printf(" - enabled free-space-tree (-R free-space-tree)\n");
|
|
|
|
printf("\n");
|
|
|
|
}
|
2021-09-29 22:00:14 +08:00
|
|
|
|
2015-07-01 23:49:21 +08:00
|
|
|
mkfs_cfg.label = label;
|
2016-09-01 01:38:31 +08:00
|
|
|
memcpy(mkfs_cfg.fs_uuid, fs_uuid, sizeof(mkfs_cfg.fs_uuid));
|
2015-07-01 23:49:21 +08:00
|
|
|
mkfs_cfg.num_bytes = dev_block_count;
|
|
|
|
mkfs_cfg.nodesize = nodesize;
|
|
|
|
mkfs_cfg.sectorsize = sectorsize;
|
|
|
|
mkfs_cfg.stripesize = stripesize;
|
|
|
|
mkfs_cfg.features = features;
|
2019-09-03 23:00:42 +08:00
|
|
|
mkfs_cfg.csum_type = csum_type;
|
2022-02-23 06:26:12 +08:00
|
|
|
mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
|
2022-09-04 18:47:20 +08:00
|
|
|
if (opt_zoned)
|
2021-10-05 14:22:59 +08:00
|
|
|
mkfs_cfg.zone_size = zone_size(file);
|
|
|
|
else
|
|
|
|
mkfs_cfg.zone_size = 0;
|
2015-07-01 23:49:21 +08:00
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
ret = make_btrfs(prepare_ctx[0].fd, &mkfs_cfg);
|
2007-03-21 08:35:03 +08:00
|
|
|
if (ret) {
|
2018-10-25 20:10:54 +08:00
|
|
|
errno = -ret;
|
|
|
|
error("error during mkfs: %m");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2007-03-21 08:35:03 +08:00
|
|
|
}
|
2011-12-13 02:00:25 +08:00
|
|
|
|
2023-06-13 18:26:53 +08:00
|
|
|
oca.filename = file;
|
|
|
|
oca.flags = OPEN_CTREE_WRITES | OPEN_CTREE_TEMPORARY_SUPER;
|
|
|
|
fs_info = open_ctree_fs_info(&oca);
|
2016-08-23 22:18:33 +08:00
|
|
|
if (!fs_info) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("open ctree failed");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2011-12-13 02:00:25 +08:00
|
|
|
}
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
|
2016-08-23 22:18:33 +08:00
|
|
|
root = fs_info->fs_root;
|
2008-09-24 00:29:10 +08:00
|
|
|
|
Revert "btrfs-progs: mkfs: create only desired block groups for single device"
This reverts commit 5f8232e5c8f0b0de0ef426274911385b0e877392.
This commit causes a regression:
$ mkfs.btrfs -f /dev/sda6
$ btrfsck /dev/sda6
Checking filesystem on /dev/sda6
UUID: 2ebb483c-1986-4610-802a-c6f3e6ab4b76
checking extents
Chunk[256, 228, 0]: length(4194304), offset(0), type(2) mismatch with
block group[0, 192, 4194304]: offset(4194304), objectid(0), flags(34)
Chunk[256, 228, 4194304]: length(8388608), offset(4194304), type(4)
mismatch with block group[4194304, 192, 8388608]: offset(8388608),
objectid(4194304), flags(36)
Block group[0, 4194304] (flags = 34) didn't find the relative chunk.
Block group[4194304, 8388608] (flags = 36) didn't find the relative
chunk.
......
The commit has the following bug causing the problem.
1) Typo forgets to add meta/data_profile for alloc_chunk.
Only meta/data_profile is added to allocate a block group, but not
chunk.
2) Type for the first system chunk is impossible to modify yet.
The type for the first chunk and its stripe is hard coded into
make_btrfs() function.
So even we try to modify the type of the block group, we are unable to
change the type of the first chunk.
Causing the chunk type mismatch problem.
The 1st bug can be fixed quite easily but the second is not.
The good news is, the last patch "btrfs-progs: mkfs: Cleanup temporary
chunk to avoid strange balance behavior." from my patchset can handle it
quite well alone.
So just revert the patch.
New bug fix for btrfsck(err is 0 even chunk/extent tree is corrupted) and
new test cases for mkfs will follow soon.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-14 10:13:01 +08:00
|
|
|
ret = create_metadata_block_groups(root, mixed, &allocation);
|
2015-07-02 01:12:38 +08:00
|
|
|
if (ret) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("failed to create default block groups: %d", ret);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2015-07-02 01:12:38 +08:00
|
|
|
}
|
|
|
|
|
2023-09-15 00:05:35 +08:00
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE) {
|
|
|
|
ret = setup_raid_stripe_tree_root(fs_info);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to initialize raid-stripe-tree: %d (%m)", ret);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-02 01:15:35 +08:00
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2017-08-28 22:48:16 +08:00
|
|
|
if (IS_ERR(trans)) {
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -PTR_ERR(trans);
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2015-07-11 06:18:21 +08:00
|
|
|
}
|
2015-07-02 01:15:35 +08:00
|
|
|
|
Revert "btrfs-progs: mkfs: create only desired block groups for single device"
This reverts commit 5f8232e5c8f0b0de0ef426274911385b0e877392.
This commit causes a regression:
$ mkfs.btrfs -f /dev/sda6
$ btrfsck /dev/sda6
Checking filesystem on /dev/sda6
UUID: 2ebb483c-1986-4610-802a-c6f3e6ab4b76
checking extents
Chunk[256, 228, 0]: length(4194304), offset(0), type(2) mismatch with
block group[0, 192, 4194304]: offset(4194304), objectid(0), flags(34)
Chunk[256, 228, 4194304]: length(8388608), offset(4194304), type(4)
mismatch with block group[4194304, 192, 8388608]: offset(8388608),
objectid(4194304), flags(36)
Block group[0, 4194304] (flags = 34) didn't find the relative chunk.
Block group[4194304, 8388608] (flags = 36) didn't find the relative
chunk.
......
The commit has the following bug causing the problem.
1) Typo forgets to add meta/data_profile for alloc_chunk.
Only meta/data_profile is added to allocate a block group, but not
chunk.
2) Type for the first system chunk is impossible to modify yet.
The type for the first chunk and its stripe is hard coded into
make_btrfs() function.
So even we try to modify the type of the block group, we are unable to
change the type of the first chunk.
Causing the chunk type mismatch problem.
The 1st bug can be fixed quite easily but the second is not.
The good news is, the last patch "btrfs-progs: mkfs: Cleanup temporary
chunk to avoid strange balance behavior." from my patchset can handle it
quite well alone.
So just revert the patch.
New bug fix for btrfsck(err is 0 even chunk/extent tree is corrupted) and
new test cases for mkfs will follow soon.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-14 10:13:01 +08:00
|
|
|
ret = create_data_block_groups(trans, root, mixed, &allocation);
|
2015-07-02 01:19:05 +08:00
|
|
|
if (ret) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("failed to create default data block groups: %d", ret);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2015-07-02 01:19:05 +08:00
|
|
|
}
|
|
|
|
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) {
|
2022-03-08 06:11:03 +08:00
|
|
|
ret = create_global_roots(trans, nr_global_roots);
|
|
|
|
if (ret) {
|
|
|
|
error("failed to create global roots: %d", ret);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-10 00:42:02 +08:00
|
|
|
ret = make_root_dir(trans, root);
|
2007-03-21 23:13:29 +08:00
|
|
|
if (ret) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("failed to setup the root directory: %d", ret);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2007-03-21 23:13:29 +08:00
|
|
|
}
|
2008-09-24 00:29:10 +08:00
|
|
|
|
2016-08-23 00:18:14 +08:00
|
|
|
ret = btrfs_commit_transaction(trans, root);
|
|
|
|
if (ret) {
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "%m");
|
2016-08-23 00:18:14 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2015-07-02 01:15:35 +08:00
|
|
|
|
2008-04-04 04:35:48 +08:00
|
|
|
trans = btrfs_start_transaction(root, 1);
|
2017-08-28 22:48:16 +08:00
|
|
|
if (IS_ERR(trans)) {
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -PTR_ERR(trans);
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2015-07-11 06:18:21 +08:00
|
|
|
}
|
2008-04-04 04:35:48 +08:00
|
|
|
|
2022-09-30 14:12:54 +08:00
|
|
|
if (device_count == 0)
|
Revert "btrfs-progs: mkfs: create only desired block groups for single device"
This reverts commit 5f8232e5c8f0b0de0ef426274911385b0e877392.
This commit causes a regression:
$ mkfs.btrfs -f /dev/sda6
$ btrfsck /dev/sda6
Checking filesystem on /dev/sda6
UUID: 2ebb483c-1986-4610-802a-c6f3e6ab4b76
checking extents
Chunk[256, 228, 0]: length(4194304), offset(0), type(2) mismatch with
block group[0, 192, 4194304]: offset(4194304), objectid(0), flags(34)
Chunk[256, 228, 4194304]: length(8388608), offset(4194304), type(4)
mismatch with block group[4194304, 192, 8388608]: offset(8388608),
objectid(4194304), flags(36)
Block group[0, 4194304] (flags = 34) didn't find the relative chunk.
Block group[4194304, 8388608] (flags = 36) didn't find the relative
chunk.
......
The commit has the following bug causing the problem.
1) Typo forgets to add meta/data_profile for alloc_chunk.
Only meta/data_profile is added to allocate a block group, but not
chunk.
2) Type for the first system chunk is impossible to modify yet.
The type for the first chunk and its stripe is hard coded into
make_btrfs() function.
So even we try to modify the type of the block group, we are unable to
change the type of the first chunk.
Causing the chunk type mismatch problem.
The 1st bug can be fixed quite easily but the second is not.
The good news is, the last patch "btrfs-progs: mkfs: Cleanup temporary
chunk to avoid strange balance behavior." from my patchset can handle it
quite well alone.
So just revert the patch.
New bug fix for btrfsck(err is 0 even chunk/extent tree is corrupted) and
new test cases for mkfs will follow soon.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-14 10:13:01 +08:00
|
|
|
goto raid_groups;
|
2008-03-25 03:04:49 +08:00
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
for (i = 1; i < device_count; i++) {
|
|
|
|
ret = btrfs_device_already_in_root(root, prepare_ctx[i].fd,
|
2008-04-18 22:45:17 +08:00
|
|
|
BTRFS_SUPER_INFO_OFFSET);
|
|
|
|
if (ret) {
|
2016-08-19 00:38:34 +08:00
|
|
|
error("skipping duplicate device %s in the filesystem",
|
2008-04-18 22:45:17 +08:00
|
|
|
file);
|
|
|
|
continue;
|
|
|
|
}
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
dev_block_count = prepare_ctx[i].dev_block_count;
|
2022-09-04 18:47:20 +08:00
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
if (prepare_ctx[i].ret) {
|
|
|
|
errno = -prepare_ctx[i].ret;
|
|
|
|
error("unable to prepare device %s: %m", prepare_ctx[i].file);
|
2017-08-22 13:35:06 +08:00
|
|
|
goto error;
|
2013-12-18 12:07:55 +08:00
|
|
|
}
|
2008-03-25 03:04:49 +08:00
|
|
|
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
ret = btrfs_add_to_fsid(trans, root, prepare_ctx[i].fd,
|
|
|
|
prepare_ctx[i].file, dev_block_count,
|
2008-03-25 03:04:49 +08:00
|
|
|
sectorsize, sectorsize, sectorsize);
|
2016-08-22 23:50:49 +08:00
|
|
|
if (ret) {
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
error("unable to add %s to filesystem: %d",
|
|
|
|
prepare_ctx[i].file, ret);
|
2019-08-14 09:03:58 +08:00
|
|
|
goto error;
|
2016-08-22 23:50:49 +08:00
|
|
|
}
|
2021-09-28 02:26:32 +08:00
|
|
|
if (bconf.verbose >= 2) {
|
2015-06-06 07:39:26 +08:00
|
|
|
struct btrfs_device *device;
|
|
|
|
|
2016-08-23 22:18:33 +08:00
|
|
|
device = container_of(fs_info->fs_devices->devices.next,
|
2015-06-06 07:39:26 +08:00
|
|
|
struct btrfs_device, dev_list);
|
2022-09-28 07:13:00 +08:00
|
|
|
printf("adding device %s id %llu\n", file, device->devid);
|
2015-06-06 07:39:26 +08:00
|
|
|
}
|
2008-03-25 03:04:49 +08:00
|
|
|
}
|
2008-04-04 04:35:48 +08:00
|
|
|
|
2023-09-15 00:05:36 +08:00
|
|
|
if (opt_zoned)
|
|
|
|
btrfs_get_dev_zone_info_all_devices(fs_info);
|
|
|
|
|
Revert "btrfs-progs: mkfs: create only desired block groups for single device"
This reverts commit 5f8232e5c8f0b0de0ef426274911385b0e877392.
This commit causes a regression:
$ mkfs.btrfs -f /dev/sda6
$ btrfsck /dev/sda6
Checking filesystem on /dev/sda6
UUID: 2ebb483c-1986-4610-802a-c6f3e6ab4b76
checking extents
Chunk[256, 228, 0]: length(4194304), offset(0), type(2) mismatch with
block group[0, 192, 4194304]: offset(4194304), objectid(0), flags(34)
Chunk[256, 228, 4194304]: length(8388608), offset(4194304), type(4)
mismatch with block group[4194304, 192, 8388608]: offset(8388608),
objectid(4194304), flags(36)
Block group[0, 4194304] (flags = 34) didn't find the relative chunk.
Block group[4194304, 8388608] (flags = 36) didn't find the relative
chunk.
......
The commit has the following bug causing the problem.
1) Typo forgets to add meta/data_profile for alloc_chunk.
Only meta/data_profile is added to allocate a block group, but not
chunk.
2) Type for the first system chunk is impossible to modify yet.
The type for the first chunk and its stripe is hard coded into
make_btrfs() function.
So even we try to modify the type of the block group, we are unable to
change the type of the first chunk.
Causing the chunk type mismatch problem.
The 1st bug can be fixed quite easily but the second is not.
The good news is, the last patch "btrfs-progs: mkfs: Cleanup temporary
chunk to avoid strange balance behavior." from my patchset can handle it
quite well alone.
So just revert the patch.
New bug fix for btrfsck(err is 0 even chunk/extent tree is corrupted) and
new test cases for mkfs will follow soon.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-14 10:13:01 +08:00
|
|
|
raid_groups:
|
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.
The new method will over-estimate, but should ensure we won't encounter
ENOSPC.
It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size
Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier
PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.
Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier
Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.
This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.
Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00
|
|
|
ret = create_raid_groups(trans, root, data_profile,
|
|
|
|
metadata_profile, mixed, &allocation);
|
|
|
|
if (ret) {
|
|
|
|
error("unable to create raid groups: %d", ret);
|
|
|
|
goto out;
|
2010-07-08 17:17:59 +08:00
|
|
|
}
|
2008-04-23 02:06:31 +08:00
|
|
|
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
/*
|
|
|
|
* Commit current transaction so we can COW all existing tree blocks
|
|
|
|
* to newly created raid groups.
|
|
|
|
* As currently we use btrfs_search_slot() to COW tree blocks in
|
|
|
|
* recow_roots(), if a tree block is already modified in current trans,
|
|
|
|
* it won't be re-COWed, thus it will stay in temporary chunks.
|
|
|
|
*/
|
|
|
|
ret = btrfs_commit_transaction(trans, root);
|
|
|
|
if (ret) {
|
|
|
|
errno = -ret;
|
2022-09-30 15:12:06 +08:00
|
|
|
error_msg(ERROR_MSG_COMMIT_TRANS, "before recowing trees: %m");
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
trans = btrfs_start_transaction(root, 1);
|
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
errno = -PTR_ERR(trans);
|
2022-09-30 15:12:06 +08:00
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
btrfs-progs: mkfs: recow all tree blocks properly
[BUG]
Since btrfs-progs v5.14, mkfs.btrfs no longer cleans up the temporary
SINGLE metadata chunks if "-R free-space-tree" is specified:
$ mkfs.btrfs -f -R free-space-tree -m dup -d dup /dev/test/test
$ btrfs ins dump-tree -t chunk /dev/test/test | grep "type METADATA"
length 8388608 owner 2 stripe_len 65536 type METADATA
length 268435456 owner 2 stripe_len 65536 type METADATA|DUP
[CAUSE]
Since commit 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree
at make_btrfs() time"), free space tree is created when the temporary
btrfs image is created.
This behavior itself has no problem at all. The problem happens when
"-m DUP -d DUP" (or other profiles) is specified.
This makes btrfs to create extra chunks, enlarging free space tree so
that it can be as high as level 1.
During mkfs, we rely on recow_roots() to re-COW all tree blocks to the
newly allocated chunks.
But __recow_root() can only handle tree root at level 0, as it forces
root node to be COWed, not bothering the children leaves/nodes.
This makes part of the free space cache tree still live on the old
temporary chunks, leaving later cleanup_temp_chunks() unable to delete
temporary SINGLE chunks.
[FIX]
Rework __recow_root() to do a proper COW of the whole tree.
But above rework is not enough, as if a free space tree block is
allocated during current transaction, but before new chunks added.
Then the reworked __recow_root() can't COW it, as btrfs_search_slot()
won't COW a tree block allocated in current transaction.
So this patch will also commit current transaction before calling
recow_roots(), to force us to re-cow all tree blocks.
This shouldn't be a problem, as at the time of calling, we should have
less than a dozen tree blocks, thus there won't be a performance impact.
Reported-by: FireFish5000 <firefish5000@gmail.com>
Fixes: 4b6cf2a3eb78 ("btrfs-progs: mkfs: generate free space tree at make_btrfs() time")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-10-11 20:06:49 +08:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/* COW all tree blocks to newly created chunks */
|
|
|
|
ret = recow_roots(trans, root);
|
|
|
|
if (ret) {
|
|
|
|
errno = -ret;
|
|
|
|
error("unable to COW tree blocks to new profiles: %m");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2019-01-03 15:32:18 +08:00
|
|
|
ret = create_data_reloc_tree(trans);
|
2016-08-22 23:50:49 +08:00
|
|
|
if (ret) {
|
|
|
|
error("unable to create data reloc tree: %d", ret);
|
|
|
|
goto out;
|
|
|
|
}
|
2008-09-26 22:26:53 +08:00
|
|
|
|
2019-01-03 15:32:21 +08:00
|
|
|
ret = create_uuid_tree(trans);
|
2018-02-28 03:38:49 +08:00
|
|
|
if (ret)
|
|
|
|
warning(
|
|
|
|
"unable to create uuid tree, will be created after mount: %d", ret);
|
|
|
|
|
2016-08-23 00:18:14 +08:00
|
|
|
ret = btrfs_commit_transaction(trans, root);
|
|
|
|
if (ret) {
|
2022-09-30 15:12:06 +08:00
|
|
|
errno = -ret;
|
|
|
|
error_msg(ERROR_MSG_START_TRANS, "%m");
|
2016-08-23 00:18:14 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2010-07-08 17:17:59 +08:00
|
|
|
|
2017-11-29 14:31:39 +08:00
|
|
|
ret = cleanup_temp_chunks(fs_info, &allocation, data_profile,
|
|
|
|
metadata_profile, metadata_profile);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to cleanup temporary chunks: %d", ret);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2023-05-27 03:13:06 +08:00
|
|
|
if (source_dir) {
|
2023-05-27 03:52:27 +08:00
|
|
|
pr_verbose(LOG_DEFAULT, "Rootdir from: %s\n", source_dir);
|
|
|
|
ret = btrfs_mkfs_fill_dir(source_dir, root);
|
2016-08-22 23:50:49 +08:00
|
|
|
if (ret) {
|
2018-06-03 04:30:22 +08:00
|
|
|
error("error while filling filesystem: %d", ret);
|
2016-08-22 23:50:49 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2017-10-19 17:13:55 +08:00
|
|
|
if (shrink_rootdir) {
|
2023-05-27 03:52:27 +08:00
|
|
|
pr_verbose(LOG_DEFAULT, " Shrink: yes\n");
|
2017-10-19 17:13:55 +08:00
|
|
|
ret = btrfs_mkfs_shrink_fs(fs_info, &shrink_size,
|
|
|
|
shrink_rootdir);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("error while shrinking filesystem: %d",
|
|
|
|
ret);
|
|
|
|
goto out;
|
|
|
|
}
|
2023-05-27 03:52:27 +08:00
|
|
|
} else {
|
|
|
|
pr_verbose(LOG_DEFAULT, " Shrink: no\n");
|
2017-10-19 15:12:58 +08:00
|
|
|
}
|
2010-07-08 17:17:59 +08:00
|
|
|
}
|
|
|
|
|
2023-09-28 01:46:46 +08:00
|
|
|
if (features.runtime_flags & BTRFS_FEATURE_RUNTIME_QUOTA ||
|
|
|
|
features.incompat_flags & BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA) {
|
2018-05-08 14:31:54 +08:00
|
|
|
ret = setup_quota_root(fs_info);
|
|
|
|
if (ret < 0) {
|
|
|
|
error("failed to initialize quota: %d (%m)", ret);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2021-09-28 02:26:32 +08:00
|
|
|
if (bconf.verbose) {
|
btrfs-progs: mkfs: fix a stack over-flow when features string are too long
[BUG]
Even with chunk_objectid bug fixed, mkfs.btrfs can still caused stack
overflow when enabling extent-tree-v2 feature (need experimental
features enabled):
# ./mkfs.btrfs -f -O extent-tree-v2 ~/test.img
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
NOTE: several default settings have changed in version 5.15, please make sure
this does not affect your deployments:
- DUP for metadata (-m dup)
- enabled no-holes (-O no-holes)
- enabled free-space-tree (-R free-space-tree)
Label: (null)
UUID: 205c61e7-f58e-4e8f-9dc2-38724f5c554b
Node size: 16384
Sector size: 4096
Filesystem size: 512.00MiB
Block group profiles:
Data: single 8.00MiB
Metadata: DUP 32.00MiB
System: DUP 8.00MiB
SSD detected: no
Zoned device: no
=================================================================
[... Skip full ASAN output ...]
==65655==ABORTING
[CAUSE]
For experimental build, we have unified feature output, but the old
buffer size is only 64 bytes, which is too small to cover the new full
feature string:
extref, skinny-metadata, no-holes, free-space-tree, block-group-tree, extent-tree-v2
Above feature string is already 84 bytes, over the 64 on-stack memory
size.
This can also be proved by the ASAN output:
==65655==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffc4e03b1d0 at pc 0x7ff0fc05fafe bp 0x7ffc4e03ac60 sp 0x7ffc4e03a408
WRITE of size 17 at 0x7ffc4e03b1d0 thread T0
#0 0x7ff0fc05fafd in __interceptor_strcat /usr/src/debug/gcc/libsanitizer/asan/asan_interceptors.cpp:377
#1 0x55cdb7b06ca5 in parse_features_to_string common/fsfeatures.c:316
#2 0x55cdb7b06ce1 in btrfs_parse_fs_features_to_string common/fsfeatures.c:324
#3 0x55cdb7a37226 in main mkfs/main.c:1783
#4 0x7ff0fbe3c28f (/usr/lib/libc.so.6+0x2328f)
#5 0x7ff0fbe3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349)
#6 0x55cdb7a2cb34 in _start ../sysdeps/x86_64/start.S:115
[FIX]
Introduce a new macro, BTRFS_FEATURE_STRING_BUF_SIZE, along with a new
sanity check helper, btrfs_assert_feature_buf_size().
The problem is I can not find a build time method to verify
BTRFS_FEATURE_STRING_BUF_SIZE is large enough to contain all feature
names, thus have to go the runtime function to do the BUG_ON() to verify
the macro size.
Now the minimal buffer size for experimental build is 138 bytes, just
bump it to 160 for future expansion.
And if further features go beyond that number, mkfs.btrfs/btrfs-convert
will immediately crash at that BUG_ON(), so we can definitely detect it.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-07 20:03:01 +08:00
|
|
|
char features_buf[BTRFS_FEATURE_STRING_BUF_SIZE];
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
|
2017-10-20 09:59:06 +08:00
|
|
|
update_chunk_allocation(fs_info, &allocation);
|
2015-06-08 22:26:54 +08:00
|
|
|
printf("Label: %s\n", label);
|
2016-11-11 01:23:01 +08:00
|
|
|
printf("UUID: %s\n", mkfs_cfg.fs_uuid);
|
2015-06-08 22:26:54 +08:00
|
|
|
printf("Node size: %u\n", nodesize);
|
|
|
|
printf("Sector size: %u\n", sectorsize);
|
|
|
|
printf("Filesystem size: %s\n",
|
2016-08-23 22:18:33 +08:00
|
|
|
pretty_size(btrfs_super_total_bytes(fs_info->super_copy)));
|
2015-06-08 22:26:54 +08:00
|
|
|
printf("Block group profiles:\n");
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
if (allocation.data)
|
2015-06-08 22:26:54 +08:00
|
|
|
printf(" Data: %-8s %16s\n",
|
|
|
|
btrfs_group_profile_str(data_profile),
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
pretty_size(allocation.data));
|
|
|
|
if (allocation.metadata)
|
2015-06-08 22:26:54 +08:00
|
|
|
printf(" Metadata: %-8s %16s\n",
|
|
|
|
btrfs_group_profile_str(metadata_profile),
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
pretty_size(allocation.metadata));
|
|
|
|
if (allocation.mixed)
|
2015-06-08 22:26:54 +08:00
|
|
|
printf(" Data+Metadata: %-8s %16s\n",
|
|
|
|
btrfs_group_profile_str(data_profile),
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
pretty_size(allocation.mixed));
|
2015-06-08 22:26:54 +08:00
|
|
|
printf(" System: %-8s %16s\n",
|
|
|
|
btrfs_group_profile_str(metadata_profile),
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
pretty_size(allocation.system));
|
2015-06-08 22:26:54 +08:00
|
|
|
printf("SSD detected: %s\n", ssd ? "yes" : "no");
|
2022-09-04 18:47:20 +08:00
|
|
|
printf("Zoned device: %s\n", opt_zoned ? "yes" : "no");
|
|
|
|
if (opt_zoned)
|
2021-05-06 23:35:44 +08:00
|
|
|
printf(" Zone size: %s\n",
|
2021-04-26 14:27:36 +08:00
|
|
|
pretty_size(fs_info->zone_size));
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
btrfs_parse_fs_features_to_string(features_buf, &features);
|
|
|
|
#if EXPERIMENTAL
|
|
|
|
printf("Features: %s\n", features_buf);
|
|
|
|
#else
|
2019-09-25 21:37:25 +08:00
|
|
|
printf("Incompat features: %s\n", features_buf);
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
btrfs_parse_runtime_features_to_string(features_buf, &features);
|
2018-05-08 14:31:53 +08:00
|
|
|
printf("Runtime features: %s\n", features_buf);
|
btrfs-progs: fsfeatures: properly merge -O and -R options
[BUG]
Commit "btrfs-progs: prepare merging compat feature lists" tries to
merged "-O" and "-R" options, as they don't correctly represents
btrfs features.
But that commit caused the following bug during mkfs for experimental
build:
$ mkfs.btrfs -f -O block-group-tree /dev/nvme0n1
btrfs-progs v5.19.1
See http://btrfs.wiki.kernel.org for more information.
ERROR: superblock magic doesn't match
ERROR: illegal nodesize 16384 (not equal to 4096 for mixed block group)
[CAUSE]
Currently btrfs_parse_fs_features() will return a u64, and reuse the
same u64 for both incompat and compat RO flags for experimental branch.
This can easily leads to conflicts, as
BTRFS_FEATURE_INCOMPAT_MIXED_BLOCK_GROUP and
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE both share the same bit
(1 << 2).
Thus for above case, mkfs.btrfs believe it has set MIXED_BLOCK_GROUP
feature, but what we really want is BLOCK_GROUP_TREE.
[FIX]
Instead of incorrectly re-using the same bits in btrfs_feature, split
the old flags into 3 flags:
- incompat_flag
- compat_ro_flag
- runtime_flag
The first two flags are easy to understand, the corresponding flag of
each feature.
The last runtime_flag is to compensate features which doesn't have any
on-disk flag set, like QUOTA and LIST_ALL.
And since we're no longer using a single u64 as features, we have to
introduce a new structure, btrfs_mkfs_features, to contain above 3
flags.
This also mean, things like default mkfs features must be converted to
use the new structure, thus those old macros are all converted to
const static structures:
- BTRFS_MKFS_DEFAULT_FEATURES + BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES
-> btrfs_mkfs_default_features
- BTRFS_CONVERT_ALLOWED_FEATURES -> btrfs_convert_allowed_features
And since we're using a structure, it's not longer as easy to implement
a disallowed mask.
Thus functions with @mask_disallowed are all changed to using
an @allowed structure pointer (which can be NULL).
Finally if we have experimental features enabled, all features can be
specified by -O options, and we can output a unified feature list,
instead of the old split ones.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-10-05 09:48:07 +08:00
|
|
|
#endif
|
2023-05-27 03:52:27 +08:00
|
|
|
printf("Checksum: %s\n",
|
2019-09-25 21:37:25 +08:00
|
|
|
btrfs_super_csum_name(mkfs_cfg.csum_type));
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
|
|
|
|
list_all_devices(root);
|
2021-06-23 19:20:10 +08:00
|
|
|
|
|
|
|
if (mkfs_cfg.csum_type == BTRFS_CSUM_TYPE_SHA256) {
|
|
|
|
printf(
|
|
|
|
"NOTE: you may need to manually load kernel module implementing accelerated SHA256 in case\n"
|
|
|
|
" the generic implementation is built-in, before mount. Check lsmod or /proc/crypto\n\n"
|
|
|
|
);
|
|
|
|
}
|
btrfs-progs: mkfs: print the summary
This patch prints the summary of the filesystem after the creation.
The main fileds printed are:
- devices list with their uuid, devid, path and size
- raid profile (dup,single,raid0...)
- leafsize/nodesize/sectorsize
- filesystem features (raid56, extref, mixed-bg)
- chunk size and type
If the '-v' switched is passed, the output is more verbose; if the '-q'
switched is passed, only the errors are printed.
Below an example:
BTRFS filesystem summary:
Label: btrfs-test
UUID: 14ae8a88-98ac-4f22-8441-79f76ec622f7
Node size: 4096
Leaf size: 4096
Sector size: 4096
Initial chunks:
Data+Metadata: 9.01GiB
System: 18.06MiB
Metadata profile: RAID5
Data profile: RAID5
Mixed mode: YES
SSD detected: NO
Incompat features: mixed-bg, extref, raid56
Number of devices: 10
UUID ID SIZE PATH
------------------------------------ -- --------- -----------
df1c7f50-1980-4da2-8bc9-7ee6ffb0b554 1 50.00GiB /dev/vdb
32c808a0-cd7b-4497-a2c0-1d77a9854af9 2 50.00GiB /dev/vdc
3159782e-d108-40bc-9e15-090ecac160b4 3 50.00GiB /dev/vdd
db7eaf0c-beb8-4093-a9d0-b9c25c146305 4 50.00GiB /dev/vde
c367ca04-1f71-49c0-a331-11fc0b87e9fc 5 50.00GiB /dev/vdf
e9b73c86-4058-4b3a-90ac-18741a276e70 6 50.00GiB /dev/vdg
c4298b7a-ad41-4690-bf10-bf748b319413 7 50.00GiB /dev/vdh
1cf048c8-af8a-4225-b09a-5d12e9b217fa 8 2.00GiB /dev/vdi
7e157869-768a-4725-bad5-82e6bd05fd17 9 2.00GiB /dev/vdj
2c9431ac-c7f0-45a5-8529-cef8cf6e4033 10 2.00GiB /dev/vdk
Total devices size: 356.01GiB
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
Signed-off-by: David Sterba <dsterba@suse.cz>
2015-06-08 19:00:50 +08:00
|
|
|
}
|
|
|
|
|
2016-08-22 22:32:24 +08:00
|
|
|
/*
|
|
|
|
* The filesystem is now fully set up, commit the remaining changes and
|
|
|
|
* fix the signature as the last step before closing the devices.
|
|
|
|
*/
|
2016-08-23 22:18:33 +08:00
|
|
|
fs_info->finalize_on_close = 1;
|
btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.
[BUG]
# mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
# mount /dev/sdb /mnt/btrfs
# btrfs balance start /mnt/btrfs
# btrfs fi df /mnt/btrfs
Data, single: total=1.00GiB, used=320.00KiB
System, single: total=32.00MiB, used=16.00KiB
Metadata, RAID0: total=256.00MiB, used=112.00KiB
GlobalReserve, single: total=16.00MiB, used=0.00B
Only metadata stay RAID0. Data and system goes from RAID0 to single.
[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.
When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.
[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2015-07-07 16:15:28 +08:00
|
|
|
out:
|
2017-10-19 13:41:35 +08:00
|
|
|
close_ret = close_ctree(root);
|
2016-08-22 22:31:11 +08:00
|
|
|
|
2017-10-19 13:41:35 +08:00
|
|
|
if (!close_ret) {
|
2016-08-22 23:50:49 +08:00
|
|
|
optind = saved_optind;
|
2022-09-30 14:12:54 +08:00
|
|
|
device_count = argc - optind;
|
|
|
|
while (device_count-- > 0) {
|
2016-08-22 23:50:49 +08:00
|
|
|
file = argv[optind++];
|
2019-07-02 03:29:43 +08:00
|
|
|
if (path_is_block_device(file) == 1)
|
2016-08-22 23:50:49 +08:00
|
|
|
btrfs_register_one_device(file);
|
|
|
|
}
|
2016-08-22 22:31:11 +08:00
|
|
|
}
|
|
|
|
|
2018-04-03 16:39:45 +08:00
|
|
|
if (!ret && close_ret) {
|
|
|
|
ret = close_ret;
|
|
|
|
error("failed to close ctree, the filesystem may be inconsistent: %d",
|
|
|
|
ret);
|
|
|
|
}
|
|
|
|
|
2015-08-24 16:45:03 +08:00
|
|
|
btrfs_close_all_devices();
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
if (prepare_ctx) {
|
|
|
|
for (i = 0; i < device_count; i++)
|
|
|
|
close(prepare_ctx[i].fd);
|
|
|
|
}
|
2022-09-04 18:47:20 +08:00
|
|
|
free(t_prepare);
|
|
|
|
free(prepare_ctx);
|
2008-04-23 02:06:31 +08:00
|
|
|
free(label);
|
2022-09-14 23:03:18 +08:00
|
|
|
free(source_dir);
|
2016-08-22 23:50:49 +08:00
|
|
|
|
|
|
|
return !!ret;
|
2022-09-04 18:47:20 +08:00
|
|
|
|
2017-08-22 13:35:06 +08:00
|
|
|
error:
|
btrfs-progs: mkfs: keep file descriptors open during whole time
[BUG]
There is an internal bug report that, after mkfs.btrfs there is a chance
that no /dev/disk/by-uuid/<uuid> symlink is not created at all.
[CAUSE]
That uuid symlink is created by udev, which listens to inotify
IN_CLOSE_WRITE events from all block devices.
After such IN_CLOSE_WRITE event is triggered, udev would *disable*
inotify for that block device, and do a blkid scan on it.
After the blkid scan is done, re-enables the inotify listening.
This means normally mkfs tools should open the fd, do all the writes,
and close the fd after everything is done.
But unfortunately for mkfs.btrfs, it's not the case, we have a lot of
phases separated by different close() calls:
open_ctree() would open fds of each involved device
and close them at close_ctree()
Only after close_ctree() we have a valid superblock -\
|
|<------- A -------->|<--------- B --------->|<------- C ------->|
| |
| `- open a new fd for make_btrfs()
| and close it before open_ctree()
| The device contains invalid sb.
|
`- open a new fd for each device, then call
btrfs_prepare_device(), then close the fd.
The device would contain no valid superblock.
If at the close() of phase A udev event is triggered, while doing udev
scan we go into phase C (but before the new valid super blocks written),
udev would only see no superblock or invalid superblock.
Then phase C finished, udev resumes its inotify listening, but at this
time mkfs is finished, while udev only sees the premature data from
phase A, and misses the IN_CLOSE_WRITE events from phase C.
[FIX]
Instead of opening and closing a new fd for each device, re-use the fd
opened during prepare_one_device(), and close all the fds until
close_ctree() is called.
By this, although we may still have race between close_ctree() and
explicit close() calls, at least udev can always see the properly
written super blocks.
To compensate the change, some extra cleanups are made:
- Do not touch @device_count
Which makes later prepare_ctx iteration much easier.
- Remove top-level @fd variable
Instead go with prepare_ctx[i].fd.
- Do not open with O_RDWR in test_dev_for_mkfs()
as test_dev_for_mkfs() would close the fd, if we go O_RDWR, it can
cause the udev race.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2023-03-15 14:06:54 +08:00
|
|
|
if (prepare_ctx) {
|
|
|
|
for (i = 0; i < device_count; i++)
|
|
|
|
close(prepare_ctx[i].fd);
|
|
|
|
}
|
2022-09-04 18:47:20 +08:00
|
|
|
free(t_prepare);
|
|
|
|
free(prepare_ctx);
|
2017-08-22 13:35:06 +08:00
|
|
|
free(label);
|
2022-09-14 23:03:18 +08:00
|
|
|
free(source_dir);
|
2017-08-22 13:35:06 +08:00
|
|
|
exit(1);
|
|
|
|
success:
|
|
|
|
exit(0);
|
2007-03-21 08:35:03 +08:00
|
|
|
}
|