btrfs-progs/common/utils.c
Qu Wenruo 6dfc69104b btrfs-progs: add extra chunk alignment checks
Recently we had a scrub use-after-free caused by unaligned chunk
length, although the fix was submitted, we may want to do extra checks
for a chunk's alignment.

This patch adds such check for the starting bytenr and length of a
chunk, to make sure they are properly aligned to 64K stripe boundary.

By default, the check only leads to a warning but is not treated as an
error, as we expect kernel to handle such unalignment without any
problem.

But if the new debug environmental variable,
BTRFS_PROGS_DEBUG_STRICT_CHUNK_ALIGNMENT, is specified, then we will
treat it as an error.  So that we can detect unexpected chunks from
btrfs-progs, and fix them before reaching the end users.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-01-16 19:14:37 +01:00

1380 lines
31 KiB
C

/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2008 Morey Roof. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/sysinfo.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <mntent.h>
#include <ctype.h>
#include <limits.h>
#include <strings.h>
#include "kernel-lib/list.h"
#include "kernel-shared/accessors.h"
#include "kernel-shared/ctree.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/volumes.h"
#include "common/utils.h"
#include "common/device-utils.h"
#include "common/path-utils.h"
#include "common/open-utils.h"
#include "common/sysfs-utils.h"
#include "common/messages.h"
#include "cmds/commands.h"
#include "mkfs/common.h"
static int rand_seed_initialized = 0;
static unsigned short rand_seed[3];
struct btrfs_config bconf;
struct pending_dir {
struct list_head list;
char name[PATH_MAX];
};
void btrfs_format_csum(u16 csum_type, const u8 *data, char *output)
{
int i;
int cur = 0;
const int csum_size = btrfs_csum_type_size(csum_type);
output[0] = '\0';
snprintf(output, BTRFS_CSUM_STRING_LEN, "0x");
cur += strlen("0x");
for (i = 0; i < csum_size; i++) {
snprintf(output + cur, BTRFS_CSUM_STRING_LEN - cur, "%02x",
data[i]);
cur += 2;
}
}
int get_df(int fd, struct btrfs_ioctl_space_args **sargs_ret)
{
u64 count = 0;
int ret;
struct btrfs_ioctl_space_args *sargs;
sargs = malloc(sizeof(struct btrfs_ioctl_space_args));
if (!sargs)
return -ENOMEM;
sargs->space_slots = 0;
sargs->total_spaces = 0;
ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
if (ret < 0) {
error("cannot get space info: %m");
free(sargs);
return -errno;
}
/* This really should never happen */
if (!sargs->total_spaces) {
free(sargs);
return -ENOENT;
}
count = sargs->total_spaces;
free(sargs);
sargs = malloc(sizeof(struct btrfs_ioctl_space_args) +
(count * sizeof(struct btrfs_ioctl_space_info)));
if (!sargs)
return -ENOMEM;
sargs->space_slots = count;
sargs->total_spaces = 0;
ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
if (ret < 0) {
error("cannot get space info with %llu slots: %m",
count);
free(sargs);
return -errno;
}
*sargs_ret = sargs;
return 0;
}
static u64 find_max_device_id(struct btrfs_ioctl_search_args *search_args,
int nr_items)
{
struct btrfs_dev_item *dev_item;
char *buf = search_args->buf;
buf += (nr_items - 1) * (sizeof(struct btrfs_ioctl_search_header)
+ sizeof(struct btrfs_dev_item));
buf += sizeof(struct btrfs_ioctl_search_header);
dev_item = (struct btrfs_dev_item *)buf;
return btrfs_stack_device_id(dev_item);
}
static int search_chunk_tree_for_fs_info(int fd,
struct btrfs_ioctl_fs_info_args *fi_args)
{
int ret;
int max_items;
u64 start_devid = 1;
struct btrfs_ioctl_search_args search_args;
struct btrfs_ioctl_search_key *search_key = &search_args.key;
fi_args->num_devices = 0;
max_items = BTRFS_SEARCH_ARGS_BUFSIZE
/ (sizeof(struct btrfs_ioctl_search_header)
+ sizeof(struct btrfs_dev_item));
search_key->tree_id = BTRFS_CHUNK_TREE_OBJECTID;
search_key->min_objectid = BTRFS_DEV_ITEMS_OBJECTID;
search_key->max_objectid = BTRFS_DEV_ITEMS_OBJECTID;
search_key->min_type = BTRFS_DEV_ITEM_KEY;
search_key->max_type = BTRFS_DEV_ITEM_KEY;
search_key->min_transid = 0;
search_key->max_transid = (u64)-1;
search_key->nr_items = max_items;
search_key->max_offset = (u64)-1;
again:
search_key->min_offset = start_devid;
ret = ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args);
if (ret < 0)
return -errno;
fi_args->num_devices += (u64)search_key->nr_items;
if (search_key->nr_items == max_items) {
start_devid = find_max_device_id(&search_args,
search_key->nr_items) + 1;
goto again;
}
/* Get the latest max_id to stay consistent with the num_devices */
if (search_key->nr_items == 0)
/*
* last tree_search returns an empty buf, use the devid of
* the last dev_item of the previous tree_search
*/
fi_args->max_id = start_devid - 1;
else
fi_args->max_id = find_max_device_id(&search_args,
search_key->nr_items);
return 0;
}
/*
* For a given path, fill in the ioctl fs_ and info_ args.
* If the path is a btrfs mountpoint, fill info for all devices.
* If the path is a btrfs device, fill in only that device.
*
* The path provided must be either on a mounted btrfs fs,
* or be a mounted btrfs device.
*
* Returns 0 on success, or a negative errno.
*/
int get_fs_info(const char *path, struct btrfs_ioctl_fs_info_args *fi_args,
struct btrfs_ioctl_dev_info_args **di_ret)
{
int fd = -1;
int ret = 0;
int ndevs = 0;
u64 last_devid = 0;
int replacing = 0;
struct btrfs_fs_devices *fs_devices_mnt = NULL;
struct btrfs_ioctl_dev_info_args *di_args;
struct btrfs_ioctl_dev_info_args tmp;
char mp[PATH_MAX];
DIR *dirstream = NULL;
memset(fi_args, 0, sizeof(*fi_args));
if (path_is_block_device(path) == 1) {
struct btrfs_super_block disk_super;
/* Ensure it's mounted, then set path to the mountpoint */
fd = open(path, O_RDONLY);
if (fd < 0) {
ret = -errno;
error("cannot open %s: %m", path);
goto out;
}
ret = check_mounted_where(fd, path, mp, sizeof(mp),
&fs_devices_mnt, SBREAD_DEFAULT, false);
if (!ret) {
ret = -EINVAL;
goto out;
}
if (ret < 0)
goto out;
path = mp;
/* Only fill in this one device */
fi_args->num_devices = 1;
ret = btrfs_read_dev_super(fd, &disk_super,
BTRFS_SUPER_INFO_OFFSET, 0);
if (ret < 0) {
ret = -EIO;
goto out;
}
last_devid = btrfs_stack_device_id(&disk_super.dev_item);
fi_args->max_id = last_devid;
memcpy(fi_args->fsid, fs_devices_mnt->fsid, BTRFS_FSID_SIZE);
close(fd);
}
/* at this point path must not be for a block device */
fd = open_file_or_dir(path, &dirstream);
if (fd < 0) {
ret = -errno;
goto out;
}
/* fill in fi_args if not just a single device */
if (fi_args->num_devices != 1) {
ret = ioctl(fd, BTRFS_IOC_FS_INFO, fi_args);
if (ret < 0) {
ret = -errno;
goto out;
}
/*
* The fs_args->num_devices does not include seed devices
*/
ret = search_chunk_tree_for_fs_info(fd, fi_args);
if (ret)
goto out;
/*
* search_chunk_tree_for_fs_info() will lacks the devid 0
* so manual probe for it here.
*/
ret = device_get_info(fd, 0, &tmp);
if (!ret) {
fi_args->num_devices++;
ndevs++;
replacing = 1;
if (last_devid == 0)
last_devid++;
}
}
if (!fi_args->num_devices)
goto out;
di_args = *di_ret = malloc((fi_args->num_devices) * sizeof(*di_args));
if (!di_args) {
ret = -errno;
goto out;
}
if (replacing)
memcpy(di_args, &tmp, sizeof(tmp));
for (; last_devid <= fi_args->max_id && ndevs < fi_args->num_devices;
last_devid++) {
ret = device_get_info(fd, last_devid, &di_args[ndevs]);
if (ret == -ENODEV)
continue;
if (ret)
goto out;
ndevs++;
}
/*
* only when the only dev we wanted to find is not there then
* let any error be returned
*/
if (fi_args->num_devices != 1) {
BUG_ON(ndevs == 0);
ret = 0;
}
out:
close_file_or_dir(fd, dirstream);
return ret;
}
int get_fsid_fd(int fd, u8 *fsid)
{
int ret;
struct btrfs_ioctl_fs_info_args args;
ret = ioctl(fd, BTRFS_IOC_FS_INFO, &args);
if (ret < 0)
return -errno;
memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
return 0;
}
int get_fsid(const char *path, u8 *fsid, int silent)
{
int ret;
int fd;
int flags = O_RDONLY;
struct stat st;
ret = stat(path, &st);
if (ret < 0) {
if (!silent)
error("failed to stat %s: %m", path);
return -errno;
}
/*
* Open in non-blocking mode in case that path is a fifo or a special
* character device where opening gets stuck (but is interruptible).
*/
if ((st.st_mode & S_IFMT) == S_IFCHR || (st.st_mode & S_IFMT) == S_IFIFO)
flags |= O_NONBLOCK;
fd = open(path, flags);
if (fd < 0) {
if (!silent)
error("failed to open %s: %m", path);
return -errno;
}
ret = get_fsid_fd(fd, fsid);
close(fd);
return ret;
}
int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
u64 dev_cnt, int mixed, int ssd)
{
u64 allowed;
u64 profile = metadata_profile | data_profile;
allowed = btrfs_bg_flags_for_device_num(dev_cnt);
if (dev_cnt > 1 && profile & BTRFS_BLOCK_GROUP_DUP) {
warning("DUP is not recommended on filesystem with multiple devices");
}
if (metadata_profile & ~allowed) {
error("unable to create FS with metadata profile %s "
"(have %llu devices but %d devices are required)",
btrfs_group_profile_str(metadata_profile), dev_cnt,
btrfs_bg_type_to_devs_min(metadata_profile));
return 1;
}
if (data_profile & ~allowed) {
error("ERROR: unable to create FS with data profile %s "
"(have %llu devices but %d devices are required)",
btrfs_group_profile_str(data_profile), dev_cnt,
btrfs_bg_type_to_devs_min(data_profile));
return 1;
}
if (dev_cnt == 3 && profile & BTRFS_BLOCK_GROUP_RAID6) {
warning("RAID6 is not recommended on filesystem with 3 devices only");
}
if (dev_cnt == 2 && profile & BTRFS_BLOCK_GROUP_RAID5) {
warning("RAID5 is not recommended on filesystem with 2 devices only");
}
warning_on(!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP) && ssd,
"DUP may not actually lead to 2 copies on the device, see manual page");
return 0;
}
/*
* This reads a line from the stdin and only returns non-zero if the
* first whitespace delimited token is a case insensitive match with yes
* or y.
*/
int ask_user(const char *question)
{
char buf[30] = {0,};
char *saveptr = NULL;
char *answer;
printf("%s [y/N]: ", question);
return fgets(buf, sizeof(buf) - 1, stdin) &&
(answer = strtok_r(buf, " \t\n\r", &saveptr)) &&
(!strcasecmp(answer, "yes") || !strcasecmp(answer, "y"));
}
/*
* Partial representation of a line in /proc/pid/mountinfo
*/
struct mnt_entry {
const char *root;
const char *path;
const char *options1;
const char *fstype;
const char *device;
const char *options2;
};
/*
* Find first occurrence of up an option string (as "option=") in @options,
* separated by comma. Return allocated string as "option=value"
*/
static char *find_option(const char *options, const char *option)
{
char *tmp, *ret;
tmp = strstr(options, option);
if (!tmp)
return NULL;
ret = strdup(tmp);
tmp = ret;
while (*tmp && *tmp != ',')
tmp++;
*tmp = 0;
return ret;
}
/* Match whitespace separator */
static bool is_sep(char c)
{
return c == ' ' || c == '\t';
}
/* Advance @line skipping over all non-separator chars */
static void skip_nonsep(char **line)
{
while (**line && !is_sep(**line))
(*line)++;
}
/* Advance @line skipping over all separator chars, setting them to nul char */
static void skip_sep(char **line)
{
while (**line && is_sep(**line)) {
**line = 0;
(*line)++;
}
}
static bool isoctal(char c)
{
return '0' <= c && c <= '7';
}
/*
* Validate complete escape sequence used for mangling special chars in paths,
* eg. \012 == 10 == 0xa == '\n'.
* Mandatory format: backslash and 3 octal digits.
*/
static bool valid_escape(const char *str)
{
if (*str == 0 || *str != '\\')
return false;
str++;
if (*str == 0 || is_sep(*str) || !isoctal(*str))
return false;
str++;
if (*str == 0 || is_sep(*str) || !isoctal(*str))
return false;
str++;
if (*str == 0 || is_sep(*str) || !isoctal(*str))
return false;
return true;
}
/*
* Read a path from @line, with potentially mangled special characters.
* - the input is changed in-place when unmangling is done
* - end of path is a space character (a valid space in the path is mangled)
* - line is advanced to the final separator or nul character
* - returned path is a valid string terminated by zero or whitespace separator
*/
static char *read_path(char **line)
{
char *ret = *line;
char *out = *line;
while (**line) {
if (is_sep(**line))
break;
if (valid_escape(*line)) {
char c;
(*line)++;
c = ((*(*line)++) & 0b111) << 6;
c |= ((*(*line)++) & 0b111) << 3;
c |= ((*(*line)++) & 0b111);
*out++ = c;
} else {
*out++ = *(*line)++;
}
}
/*
* Unmangled characters make the final string shorter, add the null
* terminator. Otherwise keep the line at the space separator so
* followup parsing can continue.
*/
if (out < *line)
*out = 0;
return ret;
}
/*
* Parse a line from /proc/pid/mountinfo
* Example:
272 265 0:49 /subvol /mnt/path rw,noatime shared:145 - btrfs /dev/sda1 rw,subvolid=5598,subvol=/subvol
0 1 2 3 4 5 6 7 8 9 10
* Fields related to paths and options are parsed, @line is changed in place,
* separators are replaced by nul char, paths could be unmangled.
*/
static void parse_mntinfo_line(char *line, struct mnt_entry *ent)
{
/* Skip 0 */
skip_nonsep(&line);
skip_sep(&line);
/* Skip 1 */
skip_nonsep(&line);
skip_sep(&line);
/* Skip 2 */
skip_nonsep(&line);
skip_sep(&line);
/* Read 3 */
ent->root = read_path(&line);
skip_sep(&line);
/* Read 4 */
ent->path = read_path(&line);
skip_sep(&line);
/* Read 5 */
ent->options1 = line;
skip_nonsep(&line);
skip_sep(&line);
/* Skip 6 */
skip_nonsep(&line);
skip_sep(&line);
/* Skip 7 */
skip_nonsep(&line);
skip_sep(&line);
/* Read 8 */
ent->fstype = line;
skip_nonsep(&line);
skip_sep(&line);
/* Read 9 */
ent->device = read_path(&line);
skip_sep(&line);
/* Read 10 */
ent->options2 = line;
skip_nonsep(&line);
skip_sep(&line);
}
/*
* Compare the subvolume passed with the pathname of the directory mounted in
* btrfs. The pathname inside btrfs is different from getmnt and friends, since
* it can detect bind mounts to content from the inside of the original mount.
*
* Example:
* # mount -o subvol=/vol /dev/sda2 /mnt
* # mount --bind /mnt/dir2 /othermnt
*
* # mounts
* ...
* /dev/sda2 on /mnt type btrfs (ro,relatime,ssd,space_cache,subvolid=256,subvol=/vol)
* /dev/sda2 on /othermnt type btrfs (ro,relatime,ssd,space_cache,subvolid=256,subvol=/vol)
*
* # cat /proc/self/mountinfo
*
* 38 30 0:32 /vol /mnt ro,relatime - btrfs /dev/sda2 ro,ssd,space_cache,subvolid=256,subvol=/vol
* 37 29 0:32 /vol/dir2 /othermnt ro,relatime - btrfs /dev/sda2 ro,ssd,space_cache,subvolid=256,subvol=/vol
*
* If we try to find a mount point only using subvol and subvolid from mount
* options we would get mislead to believe that /othermnt has the same content
* as /mnt.
*
* But, using mountinfo, we have the pathaname _inside_ the filesystem, so we
* can filter out the mount points with bind mounts which have different content
* from the original mounts, in this case the mount point with id 37.
*/
int find_mount_fsroot(const char *subvol, const char *subvolid, char **mount)
{
FILE *mnt;
char *buf = NULL;
int bs = 4096;
int line = 0;
int ret = 0;
bool found = false;
mnt = fopen("/proc/self/mountinfo", "r");
if (!mnt)
return -1;
buf = malloc(bs);
if (!buf) {
ret = -ENOMEM;
goto out;
}
do {
int ch;
ch = fgetc(mnt);
if (ch == -1)
break;
if (ch == '\n') {
struct mnt_entry ent;
char *opt;
const char *value;
buf[line] = 0;
parse_mntinfo_line(buf, &ent);
/* Skip unrelated mounts */
if (strcmp(ent.fstype, "btrfs") != 0)
goto nextline;
if (strlen(ent.root) != strlen(subvol))
goto nextline;
if (strcmp(ent.root, subvol) != 0)
goto nextline;
/*
* Match subvolume by id found in mountinfo and
* requested by the caller
*/
opt = find_option(ent.options2, "subvolid=");
if (!opt)
goto nextline;
value = opt + strlen("subvolid=");
if (strcmp(value, subvolid) != 0) {
free(opt);
goto nextline;
}
free(opt);
/*
* First match is in most cases the original mount, not
* a bind mount. In case there are no further bind
* mounts, return what we found in @mount. Any
* following mount that matches by path and subvolume
* id is a bind mount and we return the original mount.
*/
if (found)
goto out;
found = true;
*mount = strdup(ent.path);
ret = 0;
goto nextline;
}
/*
* Grow buffer if needed, there are 3 paths up to PATH_MAX and
* mount options are limited by page size. Often the overall
* line length does not exceed 256.
*/
if (line >= bs) {
char *tmp;
bs += 4096;
tmp = realloc(buf, bs);
if (!tmp) {
ret = -ENOMEM;
goto out;
}
buf = tmp;
}
buf[line++] = ch;
continue;
nextline:
line = 0;
} while (1);
out:
free(buf);
fclose(mnt);
return ret;
}
/*
* return 0 if a btrfs mount point is found
* return 1 if a mount point is found but not btrfs
* return <0 if something goes wrong
*/
int find_mount_root(const char *path, char **mount_root)
{
FILE *mnttab;
int fd;
struct mntent *ent;
int len;
int ret = 0;
int not_btrfs = 1;
int longest_matchlen = 0;
char *longest_match = NULL;
fd = open(path, O_RDONLY | O_NOATIME);
if (fd < 0)
return -errno;
close(fd);
mnttab = setmntent("/proc/self/mounts", "r");
if (!mnttab)
return -errno;
while ((ent = getmntent(mnttab))) {
if (path_is_in_dir(ent->mnt_dir, path)) {
len = strlen(ent->mnt_dir);
if (longest_matchlen <= len) {
free(longest_match);
longest_matchlen = len;
longest_match = strdup(ent->mnt_dir);
if (!longest_match) {
ret = -errno;
break;
}
not_btrfs = strcmp(ent->mnt_type, "btrfs");
}
}
}
endmntent(mnttab);
if (ret)
return ret;
if (!longest_match)
return -ENOENT;
if (not_btrfs) {
free(longest_match);
return 1;
}
ret = 0;
*mount_root = realpath(longest_match, NULL);
if (!*mount_root)
ret = -errno;
free(longest_match);
return ret;
}
int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
{
int level;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
break;
if (path->slots[level] + 1 >=
btrfs_header_nritems(path->nodes[level]))
continue;
if (level == 0)
btrfs_item_key_to_cpu(path->nodes[level], key,
path->slots[level] + 1);
else
btrfs_node_key_to_cpu(path->nodes[level], key,
path->slots[level] + 1);
return 0;
}
return 1;
}
const char* btrfs_group_type_str(u64 flag)
{
u64 mask = BTRFS_BLOCK_GROUP_TYPE_MASK |
BTRFS_SPACE_INFO_GLOBAL_RSV;
switch (flag & mask) {
case BTRFS_BLOCK_GROUP_DATA:
return "Data";
case BTRFS_BLOCK_GROUP_SYSTEM:
return "System";
case BTRFS_BLOCK_GROUP_METADATA:
return "Metadata";
case BTRFS_BLOCK_GROUP_DATA|BTRFS_BLOCK_GROUP_METADATA:
return "Data+Metadata";
case BTRFS_SPACE_INFO_GLOBAL_RSV:
return "GlobalReserve";
default:
return "unknown";
}
}
const char* btrfs_group_profile_str(u64 flag)
{
int index;
flag &= ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_RESERVED);
if (flag & ~BTRFS_BLOCK_GROUP_PROFILE_MASK)
return "UNKNOWN";
index = btrfs_bg_flags_to_raid_index(flag);
return btrfs_raid_array[index].upper_name;
}
u64 div_factor(u64 num, int factor)
{
if (factor == 10)
return num;
num *= factor;
num /= 10;
return num;
}
/*
* Get the length of the string converted from a u64 number.
*
* Result is equal to log10(num) + 1, but without the use of math library.
*/
int count_digits(u64 num)
{
int ret = 0;
if (num == 0)
return 1;
while (num > 0) {
ret++;
num /= 10;
}
return ret;
}
const char *subvol_strip_mountpoint(const char *mnt, const char *full_path)
{
int len = strlen(mnt);
if (!len)
return full_path;
if ((strncmp(mnt, full_path, len) != 0) || ((len > 1) && (full_path[len] != '/'))) {
error("not on mount point: %s", mnt);
exit(1);
}
if (mnt[len - 1] != '/')
len += 1;
return full_path + len;
}
/* Set the seed manually */
void init_rand_seed(u64 seed)
{
int i;
/* only use the last 48 bits */
for (i = 0; i < 3; i++) {
rand_seed[i] = (unsigned short)(seed ^ (unsigned short)(-1));
seed >>= 16;
}
rand_seed_initialized = 1;
}
static void __init_seed(void)
{
struct timeval tv;
int ret;
int fd;
if(rand_seed_initialized)
return;
/* Use urandom as primary seed source. */
fd = open("/dev/urandom", O_RDONLY);
if (fd >= 0) {
ret = read(fd, rand_seed, sizeof(rand_seed));
close(fd);
if (ret < sizeof(rand_seed))
goto fallback;
} else {
fallback:
/* Use time and pid as fallback seed */
warning("failed to read /dev/urandom, use time and pid as random seed");
gettimeofday(&tv, 0);
rand_seed[0] = getpid() ^ (tv.tv_sec & 0xFFFF);
rand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF);
rand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16;
}
rand_seed_initialized = 1;
}
u32 rand_u32(void)
{
__init_seed();
/*
* Don't use nrand48, its range is [0,2^31) The highest bit will always
* be 0. Use jrand48 to include the highest bit.
*/
return (u32)jrand48(rand_seed);
}
/* Return random number in range [0, upper) */
unsigned int rand_range(unsigned int upper)
{
__init_seed();
/*
* Use the full 48bits to mod, which would be more uniformly
* distributed
*/
return (unsigned int)(jrand48(rand_seed) % upper);
}
int rand_int(void)
{
return (int)(rand_u32());
}
u64 rand_u64(void)
{
u64 ret = 0;
ret += rand_u32();
ret <<= 32;
ret += rand_u32();
return ret;
}
u16 rand_u16(void)
{
return (u16)(rand_u32());
}
u8 rand_u8(void)
{
return (u8)(rand_u32());
}
/*
* Parse a boolean value from an environment variable.
*
* As long as the environment variable is not set to "0", "n" or "\0",
* it would return true.
*/
bool get_env_bool(const char *env_name)
{
char *env_value_str;
env_value_str = getenv(env_name);
if (!env_value_str)
return false;
if (env_value_str[0] == '0' || env_value_str[0] == 'n' ||
env_value_str[0] == 0)
return false;
return true;
}
void btrfs_config_init(void)
{
bconf.output_format = CMD_FORMAT_TEXT;
bconf.verbose = BTRFS_BCONF_UNSET;
INIT_LIST_HEAD(&bconf.params);
}
void bconf_be_verbose(void)
{
if (bconf.verbose == BTRFS_BCONF_UNSET)
bconf.verbose = 1;
else
bconf.verbose++;
}
void bconf_be_quiet(void)
{
bconf.verbose = BTRFS_BCONF_QUIET;
}
void bconf_add_param(const char *key, const char *value)
{
struct config_param *param;
param = calloc(1, sizeof(*param));
if (!param)
return;
param->key = strdup(key);
if (value)
param->value = strdup(value);
list_add(&param->list, &bconf.params);
}
const char *bconf_param_value(const char *key)
{
struct config_param *param;
list_for_each_entry(param, &bconf.params, list) {
if (strcmp(key, param->key) == 0)
return param->value;
}
return NULL;
}
void bconf_save_param(const char *str)
{
char *tmp;
tmp = strchr(str, '=');
if (!tmp) {
bconf_add_param(str, NULL);
printf("Global param: %s\n", str);
} else {
*tmp = 0;
bconf_add_param(str, tmp + 1);
printf("Global param: %s=%s\n", str, tmp + 1);
*tmp = '=';
}
}
void bconf_set_dry_run(void)
{
pr_verbose(LOG_INFO, "Dry-run requested\n");
bconf.dry_run = 1;
}
bool bconf_is_dry_run(void)
{
return bconf.dry_run == 1;
}
/* Returns total size of main memory in bytes, -1UL if error. */
unsigned long total_memory(void)
{
struct sysinfo si;
if (sysinfo(&si) < 0) {
error("can't determine memory size");
return -1UL;
}
return si.totalram * si.mem_unit; /* bytes */
}
void print_device_info(struct btrfs_device *device, char *prefix)
{
if (prefix)
printf("%s", prefix);
printf("Device: id = %llu, name = %s\n",
device->devid, device->name);
}
void print_all_devices(struct list_head *devices)
{
struct btrfs_device *dev;
printf("All Devices:\n");
list_for_each_entry(dev, devices, dev_list)
print_device_info(dev, "\t");
printf("\n");
}
static int bit_count(u64 x)
{
int ret = 0;
while (x) {
if (x & 1)
ret++;
x >>= 1;
}
return ret;
}
static char *sprint_profiles(u64 profiles)
{
int i;
int maxlen = 1;
char *ptr;
if (bit_count(profiles) <= 1)
return NULL;
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
maxlen += strlen(btrfs_raid_array[i].lower_name) + 2;
ptr = calloc(1, maxlen);
if (!ptr)
return NULL;
if (profiles & BTRFS_AVAIL_ALLOC_BIT_SINGLE)
strcat(ptr, btrfs_raid_array[BTRFS_RAID_SINGLE].lower_name);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
if (!(btrfs_raid_array[i].bg_flag & profiles))
continue;
if (ptr[0])
strcat(ptr, ", ");
strcat(ptr, btrfs_raid_array[i].lower_name);
}
return ptr;
}
static int btrfs_get_string_for_multiple_profiles(int fd, char **data_ret,
char **metadata_ret, char **mixed_ret, char **system_ret,
char **types_ret)
{
int ret;
int i;
struct btrfs_ioctl_space_args *sargs;
u64 data_profiles = 0;
u64 metadata_profiles = 0;
u64 system_profiles = 0;
u64 mixed_profiles = 0;
const u64 mixed_profile_fl = BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA;
ret = get_df(fd, &sargs);
if (ret < 0)
return -1;
for (i = 0; i < sargs->total_spaces; i++) {
u64 flags = sargs->spaces[i].flags;
if (!(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK))
flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if ((flags & mixed_profile_fl) == mixed_profile_fl)
mixed_profiles |= flags;
else if (flags & BTRFS_BLOCK_GROUP_DATA)
data_profiles |= flags;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
metadata_profiles |= flags;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
system_profiles |= flags;
}
free(sargs);
data_profiles &= BTRFS_EXTENDED_PROFILE_MASK;
system_profiles &= BTRFS_EXTENDED_PROFILE_MASK;
mixed_profiles &= BTRFS_EXTENDED_PROFILE_MASK;
metadata_profiles &= BTRFS_EXTENDED_PROFILE_MASK;
*data_ret = sprint_profiles(data_profiles);
*metadata_ret = sprint_profiles(metadata_profiles);
*mixed_ret = sprint_profiles(mixed_profiles);
*system_ret = sprint_profiles(system_profiles);
if (types_ret) {
*types_ret = calloc(1, 64);
if (!*types_ret)
goto out;
if (*data_ret)
strcat(*types_ret, "data");
if (*metadata_ret) {
if ((*types_ret)[0])
strcat(*types_ret, ", ");
strcat(*types_ret, "metadata");
}
if (*mixed_ret) {
if ((*types_ret)[0])
strcat(*types_ret, ", ");
strcat(*types_ret, "data+metadata");
}
if (*system_ret) {
if ((*types_ret)[0])
strcat(*types_ret, ", ");
strcat(*types_ret, "system");
}
}
out:
return *data_ret || *metadata_ret || *mixed_ret || *system_ret;
}
/*
* Return string containing coma separated list of block group types that
* contain multiple profiles. The return value must be freed by the caller.
*/
char *btrfs_test_for_multiple_profiles(int fd)
{
char *data, *metadata, *system, *mixed, *types;
btrfs_get_string_for_multiple_profiles(fd, &data, &metadata, &mixed,
&system, &types);
free(data);
free(metadata);
free(mixed);
free(system);
return types;
}
int btrfs_warn_multiple_profiles(int fd)
{
int ret;
char *data_prof, *mixed_prof, *metadata_prof, *system_prof;
ret = btrfs_get_string_for_multiple_profiles(fd, &data_prof,
&metadata_prof, &mixed_prof, &system_prof, NULL);
if (ret != 1)
return ret;
warning("Multiple block group profiles detected, see 'man btrfs(5)'");
warning_on(!!data_prof, " Data: %s", data_prof);
warning_on(!!metadata_prof, " Metadata: %s", metadata_prof);
warning_on(!!mixed_prof, " Data+Metadata: %s", mixed_prof);
warning_on(!!system_prof, " System: %s", system_prof);
free(data_prof);
free(metadata_prof);
free(mixed_prof);
free(system_prof);
return 1;
}
void btrfs_warn_experimental(const char *str)
{
#if EXPERIMENTAL
warning("Experimental build with unstable or unfinished features");
warning_on(str != NULL, "%s\n", str);
#endif
}
static const char exclop_def[][16] = {
[BTRFS_EXCLOP_NONE] = "none",
[BTRFS_EXCLOP_BALANCE] = "balance",
[BTRFS_EXCLOP_BALANCE_PAUSED] = "balance paused",
[BTRFS_EXCLOP_DEV_ADD] = "device add",
[BTRFS_EXCLOP_DEV_REMOVE] = "device remove",
[BTRFS_EXCLOP_DEV_REPLACE] = "device replace",
[BTRFS_EXCLOP_RESIZE] = "resize",
[BTRFS_EXCLOP_SWAP_ACTIVATE] = "swap activate",
};
/*
* Read currently running exclusive operation from sysfs. If this is not
* available, return BTRFS_EXCLOP_UNKNOWN
*/
int get_fs_exclop(int fd)
{
int sysfs_fd;
char buf[32];
int ret;
int i;
sysfs_fd = sysfs_open_fsid_file(fd, "exclusive_operation");
if (sysfs_fd < 0)
return BTRFS_EXCLOP_UNKNOWN;
memset(buf, 0, sizeof(buf));
ret = sysfs_read_file(sysfs_fd, buf, sizeof(buf));
close(sysfs_fd);
if (ret <= 0)
return BTRFS_EXCLOP_UNKNOWN;
i = strlen(buf) - 1;
while (i > 0 && isspace(buf[i])) i--;
if (i > 0)
buf[i + 1] = 0;
for (i = 0; i < ARRAY_SIZE(exclop_def); i++) {
if (strcmp(exclop_def[i], buf) == 0)
return i;
}
return BTRFS_EXCLOP_UNKNOWN;
}
const char *get_fs_exclop_name(int op)
{
if (0 <= op && op <= ARRAY_SIZE(exclop_def))
return exclop_def[op];
return "UNKNOWN";
}
/*
* Check if there's another exclusive operation running and either return error
* or wait until there's none in case @enqueue is true. The timeout between
* checks is 1 minute as we get notification on the sysfs file when the
* operation finishes.
*
* Return:
* 0 - caller can continue, nothing running or the status is not available
* 1 - another operation running
* <0 - there was another error
*/
int check_running_fs_exclop(int fd, enum exclusive_operation start, bool enqueue)
{
int sysfs_fd;
int exclop;
int ret;
sysfs_fd = sysfs_open_fsid_file(fd, "exclusive_operation");
if (sysfs_fd < 0) {
if (sysfs_fd == -ENOENT)
return 0;
return sysfs_fd;
}
exclop = get_fs_exclop(fd);
if (exclop <= 0) {
ret = 0;
goto out;
}
/*
* Some combinations are compatible:
* - start device add when balance is paused (kernel 5.17)
*/
if (start == BTRFS_EXCLOP_DEV_ADD && exclop == BTRFS_EXCLOP_BALANCE_PAUSED) {
ret = 0;
goto out;
}
if (!enqueue) {
error(
"unable to start %s, another exclusive operation '%s' in progress",
get_fs_exclop_name(start),
get_fs_exclop_name(exclop));
ret = 1;
goto out;
} else {
pr_verbose(LOG_DEFAULT, "Waiting for another exclusive operation '%s' to finish ...",
get_fs_exclop_name(exclop));
fflush(stdout);
}
while (exclop > 0) {
fd_set fds;
struct timeval tv = { .tv_sec = 60, .tv_usec = 0 };
FD_ZERO(&fds);
FD_SET(sysfs_fd, &fds);
ret = select(sysfs_fd + 1, NULL, NULL, &fds, &tv);
if (ret < 0) {
ret = -errno;
break;
}
if (ret > 0) {
/*
* Notified before the timeout, check again before
* returning. In case there are more operations
* waiting, we want to reduce the chances to race so
* reuse the remaining time to randomize the order.
*/
tv.tv_sec /= 2;
ret = select(sysfs_fd + 1, NULL, NULL, &fds, &tv);
exclop = get_fs_exclop(fd);
if (exclop <= 0)
ret = 0;
}
}
pr_verbose(LOG_DEFAULT, " done\n");
out:
close(sysfs_fd);
return ret;
}