btrfs-progs/common/send-stream.c

625 lines
16 KiB
C
Raw Normal View History

/*
* Copyright (C) 2012 Alexander Block. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <uuid/uuid.h>
#include <unistd.h>
#include "kernel-shared/send.h"
#include "common/send-stream.h"
#include "crypto/crc32c.h"
#include "common/utils.h"
struct btrfs_send_attribute {
u16 tlv_type;
/*
* Note: in btrfs_tlv_header, this is __le16, but we need 32 bits for
* attributes with file data as of version 2 of the send stream format.
*/
u32 tlv_len;
char *data;
};
struct btrfs_send_stream {
char *read_buf;
size_t read_buf_size;
btrfs-progs: align receive buffer to enable fast CRC To use optimized CRC implementation, the input buffer must be unsigned long aligned. btrfs receive calculates checksum based on read_buf, including btrfs_cmd_header (with zeroed CRC field) and command content. Reorder the buffer to the beginning of the structure and force the alignment to 64, this should be cacheline friendly and could speed up the data transfers. Interesting parts from the report: Sending host: Fedora 33 AMD ThreadRipper 1920X - 128GB RAM 2x10GBit Ethernet, bonded MegaRaid 9270 6x16TB Seagate Exos in RAID5 Receiving host: Fedora 33 Intel i3-7300 - HT enabled - 32GB RAM 10GBit Ethernet, single connection MegaRaid 9260 12x8TB WD NAS drives in RAID5 The 2 hosts are connected to the same 10G switch. The sender could definitely saturate a 10GBit link. The practically achievable writes on the backup host would be lower, but still at least 400MB/s. The file system contains mostly large files of 1GB+, so there is little meta-data. With btrfs send/receive I'm getting a steady transfer rate of 60MB/s. The copy has been running for a little over 5 days now, having only transferred some 25TB. This is way too slow for this setup. Analyzing resource usage, the sender side is fine, both the btrfs send and the corresponding ssh process only use about 10-10% CPU, which on a 24 threaded machine is virtually nothing. However, the receiver is running with a load of ~2.6, with the sshd using 30-50% CPU and the btrfs receive a further 60-70%. The rest of the load comes from IO wait. So the bottleneck is the btrfs receive clearly. Issue: #324 Signed-off-by: Sheng Mao <shngmao@gmail.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-12-27 05:46:06 +08:00
int fd;
int cmd;
struct btrfs_send_attribute cmd_attrs[BTRFS_SEND_A_MAX + 1];
u32 version;
/*
* end of last successful read, equivalent to start of current
* malformed part of block
*/
size_t stream_pos;
struct btrfs_send_ops *ops;
void *user;
btrfs-progs: align receive buffer to enable fast CRC To use optimized CRC implementation, the input buffer must be unsigned long aligned. btrfs receive calculates checksum based on read_buf, including btrfs_cmd_header (with zeroed CRC field) and command content. Reorder the buffer to the beginning of the structure and force the alignment to 64, this should be cacheline friendly and could speed up the data transfers. Interesting parts from the report: Sending host: Fedora 33 AMD ThreadRipper 1920X - 128GB RAM 2x10GBit Ethernet, bonded MegaRaid 9270 6x16TB Seagate Exos in RAID5 Receiving host: Fedora 33 Intel i3-7300 - HT enabled - 32GB RAM 10GBit Ethernet, single connection MegaRaid 9260 12x8TB WD NAS drives in RAID5 The 2 hosts are connected to the same 10G switch. The sender could definitely saturate a 10GBit link. The practically achievable writes on the backup host would be lower, but still at least 400MB/s. The file system contains mostly large files of 1GB+, so there is little meta-data. With btrfs send/receive I'm getting a steady transfer rate of 60MB/s. The copy has been running for a little over 5 days now, having only transferred some 25TB. This is way too slow for this setup. Analyzing resource usage, the sender side is fine, both the btrfs send and the corresponding ssh process only use about 10-10% CPU, which on a 24 threaded machine is virtually nothing. However, the receiver is running with a load of ~2.6, with the sshd using 30-50% CPU and the btrfs receive a further 60-70%. The rest of the load comes from IO wait. So the bottleneck is the btrfs receive clearly. Issue: #324 Signed-off-by: Sheng Mao <shngmao@gmail.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-12-27 05:46:06 +08:00
} __attribute__((aligned(64)));
/*
* Read len bytes to buf.
* Return:
* 0 - success
* < 0 - negative errno in case of error
* > 0 - no data read, EOF
*/
static int read_buf(struct btrfs_send_stream *sctx, char *buf, size_t len)
{
int ret;
size_t pos = 0;
while (pos < len) {
ssize_t rbytes;
rbytes = read(sctx->fd, buf + pos, len - pos);
if (rbytes < 0) {
ret = -errno;
error("read from stream failed: %m");
goto out;
}
if (rbytes == 0) {
ret = 1;
goto out_eof;
}
pos += rbytes;
}
ret = 0;
out_eof:
if (0 < pos && pos < len) {
error("short read from stream: expected %zu read %zu", len, pos);
ret = -EIO;
} else {
sctx->stream_pos += pos;
}
out:
return ret;
}
/*
* Reads a single command from kernel space and decodes the TLV's into
* sctx->cmd_attrs
*
* Returns:
* 0 - success
* < 0 - an error in the command
*/
static int read_cmd(struct btrfs_send_stream *sctx)
{
int ret;
u16 cmd;
u32 cmd_len;
char *data;
u32 pos;
u32 crc;
u32 crc2;
struct btrfs_cmd_header *cmd_hdr;
size_t buf_len;
memset(sctx->cmd_attrs, 0, sizeof(sctx->cmd_attrs));
ret = read_buf(sctx, sctx->read_buf, sizeof(*cmd_hdr));
if (ret < 0)
goto out;
if (ret) {
ret = -EINVAL;
error("unexpected EOF in stream");
goto out;
}
cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
cmd_len = le32_to_cpu(cmd_hdr->len);
cmd = le16_to_cpu(cmd_hdr->cmd);
buf_len = sizeof(*cmd_hdr) + cmd_len;
if (sctx->read_buf_size < buf_len) {
void *new_read_buf;
new_read_buf = realloc(sctx->read_buf, buf_len);
if (!new_read_buf) {
ret = -ENOMEM;
errno = -ret;
error("failed to reallocate read buffer for cmd: %m");
goto out;
}
sctx->read_buf = new_read_buf;
sctx->read_buf_size = buf_len;
/* We need to reset cmd_hdr after realloc of sctx->read_buf */
cmd_hdr = (struct btrfs_cmd_header *)sctx->read_buf;
}
data = sctx->read_buf + sizeof(*cmd_hdr);
ret = read_buf(sctx, data, cmd_len);
if (ret < 0)
goto out;
if (ret) {
ret = -EINVAL;
error("unexpected EOF in stream");
goto out;
}
crc = le32_to_cpu(cmd_hdr->crc);
/* In send, CRC is computed with header crc = 0, replicate that */
cmd_hdr->crc = 0;
crc2 = crc32c(0, (unsigned char*)sctx->read_buf,
sizeof(*cmd_hdr) + cmd_len);
if (crc != crc2) {
ret = -EINVAL;
error("crc32 mismatch in command");
goto out;
}
pos = 0;
while (pos < cmd_len) {
u16 tlv_type;
struct btrfs_send_attribute *send_attr;
if (cmd_len - pos < sizeof(__le16)) {
error("send stream is truncated");
ret = -EINVAL;
goto out;
}
tlv_type = le16_to_cpu(*(__le16 *)data);
if (tlv_type == 0 || tlv_type > BTRFS_SEND_A_MAX) {
error("invalid tlv in cmd tlv_type = %hu", tlv_type);
ret = -EINVAL;
goto out;
}
send_attr = &sctx->cmd_attrs[tlv_type];
send_attr->tlv_type = tlv_type;
pos += sizeof(tlv_type);
data += sizeof(tlv_type);
if (sctx->version >= 2 && tlv_type == BTRFS_SEND_A_DATA) {
send_attr->tlv_len = cmd_len - pos;
} else {
if (cmd_len - pos < sizeof(__le16)) {
error("send stream is truncated");
ret = -EINVAL;
goto out;
}
send_attr->tlv_len = le16_to_cpu(*(__le16 *)data);
pos += sizeof(__le16);
data += sizeof(__le16);
}
if (cmd_len - pos < send_attr->tlv_len) {
error("send stream is truncated");
ret = -EINVAL;
goto out;
}
send_attr->data = data;
pos += send_attr->tlv_len;
data += send_attr->tlv_len;
}
sctx->cmd = cmd;
ret = 0;
out:
return ret;
}
static int tlv_get(struct btrfs_send_stream *sctx, int attr, void **data, int *len)
{
int ret;
struct btrfs_send_attribute *send_attr;
if (attr <= 0 || attr > BTRFS_SEND_A_MAX) {
error("invalid attribute requested, attr = %d", attr);
ret = -EINVAL;
goto out;
}
send_attr = &sctx->cmd_attrs[attr];
if (!send_attr->data) {
error("attribute %d requested but not present", attr);
ret = -ENOENT;
goto out;
}
*len = send_attr->tlv_len;
*data = send_attr->data;
ret = 0;
out:
return ret;
}
#define __TLV_GOTO_FAIL(expr) \
if ((ret = expr) < 0) \
goto tlv_get_failed;
#define __TLV_DO_WHILE_GOTO_FAIL(expr) \
do { \
__TLV_GOTO_FAIL(expr) \
} while (0)
#define TLV_GET(s, attr, data, len) \
__TLV_DO_WHILE_GOTO_FAIL(tlv_get(s, attr, data, len))
#define TLV_CHECK_LEN(expected, got) \
do { \
if (expected != got) { \
error("invalid size for attribute, " \
"expected = %d, got = %d", \
(int)expected, (int)got); \
ret = -EINVAL; \
goto tlv_get_failed; \
} \
} while (0)
#define TLV_GET_INT(s, attr, bits, v) \
do { \
__le##bits *__tmp; \
int __len; \
TLV_GET(s, attr, (void**)&__tmp, &__len); \
TLV_CHECK_LEN(sizeof(*__tmp), __len); \
*v = get_unaligned_le##bits(__tmp); \
} while (0)
#define TLV_GET_U8(s, attr, v) TLV_GET_INT(s, attr, 8, v)
#define TLV_GET_U16(s, attr, v) TLV_GET_INT(s, attr, 16, v)
#define TLV_GET_U32(s, attr, v) TLV_GET_INT(s, attr, 32, v)
#define TLV_GET_U64(s, attr, v) TLV_GET_INT(s, attr, 64, v)
static int tlv_get_string(struct btrfs_send_stream *sctx, int attr, char **str)
{
int ret;
void *data;
int len = 0;
TLV_GET(sctx, attr, &data, &len);
*str = malloc(len + 1);
if (!*str)
return -ENOMEM;
memcpy(*str, data, len);
(*str)[len] = 0;
ret = 0;
tlv_get_failed:
return ret;
}
#define TLV_GET_STRING(s, attr, str) \
__TLV_DO_WHILE_GOTO_FAIL(tlv_get_string(s, attr, str))
static int tlv_get_timespec(struct btrfs_send_stream *sctx,
int attr, struct timespec *ts)
{
int ret;
int len;
struct btrfs_timespec *bts;
TLV_GET(sctx, attr, (void**)&bts, &len);
TLV_CHECK_LEN(sizeof(*bts), len);
ts->tv_sec = le64_to_cpu(bts->sec);
ts->tv_nsec = le32_to_cpu(bts->nsec);
ret = 0;
tlv_get_failed:
return ret;
}
#define TLV_GET_TIMESPEC(s, attr, ts) \
__TLV_DO_WHILE_GOTO_FAIL(tlv_get_timespec(s, attr, ts))
static int tlv_get_uuid(struct btrfs_send_stream *sctx, int attr, u8 *uuid)
{
int ret;
int len;
void *data;
TLV_GET(sctx, attr, &data, &len);
TLV_CHECK_LEN(BTRFS_UUID_SIZE, len);
memcpy(uuid, data, BTRFS_UUID_SIZE);
ret = 0;
tlv_get_failed:
return ret;
}
#define TLV_GET_UUID(s, attr, uuid) \
__TLV_DO_WHILE_GOTO_FAIL(tlv_get_uuid(s, attr, uuid))
static int read_and_process_cmd(struct btrfs_send_stream *sctx)
{
int ret;
char *path = NULL;
char *path_to = NULL;
char *clone_path = NULL;
char *xattr_name = NULL;
void *xattr_data = NULL;
void *data = NULL;
struct timespec at;
struct timespec ct;
struct timespec mt;
u8 uuid[BTRFS_UUID_SIZE];
u8 clone_uuid[BTRFS_UUID_SIZE];
u32 compression;
u32 encryption;
u64 tmp;
u64 tmp2;
u64 ctransid;
u64 clone_ctransid;
u64 mode;
u64 dev;
u64 clone_offset;
u64 offset;
u64 ino;
u64 unencoded_file_len;
u64 unencoded_len;
u64 unencoded_offset;
int len;
int xattr_len;
ret = read_cmd(sctx);
if (ret)
goto out;
switch (sctx->cmd) {
case BTRFS_SEND_C_SUBVOL:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_UUID(sctx, BTRFS_SEND_A_UUID, uuid);
TLV_GET_U64(sctx, BTRFS_SEND_A_CTRANSID, &ctransid);
ret = sctx->ops->subvol(path, uuid, ctransid, sctx->user);
break;
case BTRFS_SEND_C_SNAPSHOT:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_UUID(sctx, BTRFS_SEND_A_UUID, uuid);
TLV_GET_U64(sctx, BTRFS_SEND_A_CTRANSID, &ctransid);
TLV_GET_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, clone_uuid);
TLV_GET_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid);
ret = sctx->ops->snapshot(path, uuid, ctransid, clone_uuid,
clone_ctransid, sctx->user);
break;
case BTRFS_SEND_C_MKFILE:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
ret = sctx->ops->mkfile(path, sctx->user);
break;
case BTRFS_SEND_C_MKDIR:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
ret = sctx->ops->mkdir(path, sctx->user);
break;
case BTRFS_SEND_C_MKNOD:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
TLV_GET_U64(sctx, BTRFS_SEND_A_MODE, &mode);
TLV_GET_U64(sctx, BTRFS_SEND_A_RDEV, &dev);
ret = sctx->ops->mknod(path, mode, dev, sctx->user);
break;
case BTRFS_SEND_C_MKFIFO:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
ret = sctx->ops->mkfifo(path, sctx->user);
break;
case BTRFS_SEND_C_MKSOCK:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
ret = sctx->ops->mksock(path, sctx->user);
break;
case BTRFS_SEND_C_SYMLINK:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
/* ino is not passed to the callbacks in v1 */
TLV_GET_U64(sctx, BTRFS_SEND_A_INO, &ino);
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH_LINK, &path_to);
ret = sctx->ops->symlink(path, path_to, sctx->user);
break;
case BTRFS_SEND_C_RENAME:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH_TO, &path_to);
ret = sctx->ops->rename(path, path_to, sctx->user);
break;
case BTRFS_SEND_C_LINK:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH_LINK, &path_to);
ret = sctx->ops->link(path, path_to, sctx->user);
break;
case BTRFS_SEND_C_UNLINK:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
ret = sctx->ops->unlink(path, sctx->user);
break;
case BTRFS_SEND_C_RMDIR:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
ret = sctx->ops->rmdir(path, sctx->user);
break;
case BTRFS_SEND_C_WRITE:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, &offset);
TLV_GET(sctx, BTRFS_SEND_A_DATA, &data, &len);
ret = sctx->ops->write(path, data, offset, len, sctx->user);
break;
case BTRFS_SEND_C_ENCODED_WRITE:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, &offset);
TLV_GET_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
&unencoded_file_len);
TLV_GET_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, &unencoded_len);
TLV_GET_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
&unencoded_offset);
/* Compression and encryption default to none if omitted. */
if (sctx->cmd_attrs[BTRFS_SEND_A_COMPRESSION].data)
TLV_GET_U32(sctx, BTRFS_SEND_A_COMPRESSION, &compression);
else
compression = BTRFS_ENCODED_IO_COMPRESSION_NONE;
if (sctx->cmd_attrs[BTRFS_SEND_A_ENCRYPTION].data)
TLV_GET_U32(sctx, BTRFS_SEND_A_ENCRYPTION, &encryption);
else
encryption = BTRFS_ENCODED_IO_ENCRYPTION_NONE;
TLV_GET(sctx, BTRFS_SEND_A_DATA, &data, &len);
ret = sctx->ops->encoded_write(path, data, offset, len,
unencoded_file_len,
unencoded_len, unencoded_offset,
compression, encryption,
sctx->user);
break;
case BTRFS_SEND_C_CLONE:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, &offset);
TLV_GET_U64(sctx, BTRFS_SEND_A_CLONE_LEN, &len);
TLV_GET_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, clone_uuid);
TLV_GET_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, &clone_ctransid);
TLV_GET_STRING(sctx, BTRFS_SEND_A_CLONE_PATH, &clone_path);
TLV_GET_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, &clone_offset);
ret = sctx->ops->clone(path, offset, len, clone_uuid,
clone_ctransid, clone_path, clone_offset,
sctx->user);
break;
case BTRFS_SEND_C_SET_XATTR:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, &xattr_name);
TLV_GET(sctx, BTRFS_SEND_A_XATTR_DATA, &xattr_data, &xattr_len);
ret = sctx->ops->set_xattr(path, xattr_name, xattr_data,
xattr_len, sctx->user);
break;
case BTRFS_SEND_C_REMOVE_XATTR:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, &xattr_name);
ret = sctx->ops->remove_xattr(path, xattr_name, sctx->user);
break;
case BTRFS_SEND_C_TRUNCATE:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_SIZE, &tmp);
ret = sctx->ops->truncate(path, tmp, sctx->user);
break;
case BTRFS_SEND_C_CHMOD:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_MODE, &tmp);
ret = sctx->ops->chmod(path, tmp, sctx->user);
break;
case BTRFS_SEND_C_CHOWN:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_UID, &tmp);
TLV_GET_U64(sctx, BTRFS_SEND_A_GID, &tmp2);
ret = sctx->ops->chown(path, tmp, tmp2, sctx->user);
break;
case BTRFS_SEND_C_UTIMES:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, &at);
TLV_GET_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, &mt);
TLV_GET_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, &ct);
ret = sctx->ops->utimes(path, &at, &mt, &ct, sctx->user);
break;
case BTRFS_SEND_C_UPDATE_EXTENT:
TLV_GET_STRING(sctx, BTRFS_SEND_A_PATH, &path);
TLV_GET_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, &offset);
TLV_GET_U64(sctx, BTRFS_SEND_A_SIZE, &tmp);
ret = sctx->ops->update_extent(path, offset, tmp, sctx->user);
break;
case BTRFS_SEND_C_END:
ret = 1;
break;
}
tlv_get_failed:
out:
free(path);
free(path_to);
free(clone_path);
free(xattr_name);
return ret;
}
/*
* If max_errors is 0, then don't stop processing the stream if one of the
* callbacks in btrfs_send_ops structure returns an error. If greater than
* zero, stop after max_errors errors happened.
*/
int btrfs_read_and_process_send_stream(int fd,
Btrfs-progs: btrfs-receive optionally honors the end-cmd A new option is added to btrfs-receive to change the behavior when an <end cmd> is received in the Btrfs send stream. The traditional behavior (which still is the default) is to continue to read the stream until an EOF condition is encountered. If an <end cmd> is received, afterwards either an EOF or a new <stream header> is expected. The new behavior (if the -e option is set on the command line) is to terminate after an <end cmd> is read without the need for an EOF. This allows the stream (e.g. a single TCP stream) to carry additional data or even multiple Btrfs send streams. Old btrfs-send tools used to encode multiple snapshots like this (with 2 snapshots in this example): <stream header> + <sequence of commands> + <end cmd> + <stream header> + <sequence of commands> + <end cmd> + EOF If the new -e option is set, the expected format is like this: <stream header> + <sequence of commands> + <sequence of commands> + <end cmd> The btrfs-send tool is changed in a seperate commit to always use the new format, i.e. to send an <end cmd> only at the end. Note that the currently existing receivers treat <end cmd> only as an indication that a new <stream header> is following. This means, you can just skip the sequence <end cmd> <stream header> without loosing compatibility. As long as an EOF is following, the currently existing receivers handle the new format (if the two new flags are used) exactly as the old one. The goal of changing the semantic of <end cmd> is to be able to use a single stream (one TCP connection) to multiplex a request/response handshake plus Btrfs send streams, all in the same stream. In this case you cannot evaluate an EOF condition as an end of the Btrfs send stream. You need something else, and the <end cmd> is just perfect for this purpose. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
2013-04-10 01:08:40 +08:00
struct btrfs_send_ops *ops, void *user,
int honor_end_cmd,
u64 max_errors)
{
int ret;
struct btrfs_send_stream sctx;
struct btrfs_stream_header hdr;
u64 errors = 0;
int last_err = 0;
sctx.fd = fd;
sctx.ops = ops;
sctx.user = user;
sctx.stream_pos = 0;
ret = read_buf(&sctx, (char*)&hdr, sizeof(hdr));
if (ret < 0)
goto out;
if (ret) {
ret = -ENODATA;
goto out;
}
if (strcmp(hdr.magic, BTRFS_SEND_STREAM_MAGIC)) {
ret = -EINVAL;
error("unexpected header");
goto out;
}
sctx.version = le32_to_cpu(hdr.version);
if (sctx.version > BTRFS_SEND_STREAM_VERSION) {
ret = -EINVAL;
error("stream version %d not supported, please use newer version",
sctx.version);
goto out;
}
sctx.read_buf = malloc(BTRFS_SEND_BUF_SIZE_V1);
if (!sctx.read_buf) {
ret = -ENOMEM;
errno = -ret;
error("unable to allocate send stream read buffer: %m");
goto out;
}
sctx.read_buf_size = BTRFS_SEND_BUF_SIZE_V1;
while (1) {
ret = read_and_process_cmd(&sctx);
if (ret < 0) {
last_err = ret;
errors++;
if (max_errors > 0 && errors >= max_errors)
break;
} else if (ret > 0) {
Btrfs-progs: btrfs-receive optionally honors the end-cmd A new option is added to btrfs-receive to change the behavior when an <end cmd> is received in the Btrfs send stream. The traditional behavior (which still is the default) is to continue to read the stream until an EOF condition is encountered. If an <end cmd> is received, afterwards either an EOF or a new <stream header> is expected. The new behavior (if the -e option is set on the command line) is to terminate after an <end cmd> is read without the need for an EOF. This allows the stream (e.g. a single TCP stream) to carry additional data or even multiple Btrfs send streams. Old btrfs-send tools used to encode multiple snapshots like this (with 2 snapshots in this example): <stream header> + <sequence of commands> + <end cmd> + <stream header> + <sequence of commands> + <end cmd> + EOF If the new -e option is set, the expected format is like this: <stream header> + <sequence of commands> + <sequence of commands> + <end cmd> The btrfs-send tool is changed in a seperate commit to always use the new format, i.e. to send an <end cmd> only at the end. Note that the currently existing receivers treat <end cmd> only as an indication that a new <stream header> is following. This means, you can just skip the sequence <end cmd> <stream header> without loosing compatibility. As long as an EOF is following, the currently existing receivers handle the new format (if the two new flags are used) exactly as the old one. The goal of changing the semantic of <end cmd> is to be able to use a single stream (one TCP connection) to multiplex a request/response handshake plus Btrfs send streams, all in the same stream. In this case you cannot evaluate an EOF condition as an end of the Btrfs send stream. You need something else, and the <end cmd> is just perfect for this purpose. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
2013-04-10 01:08:40 +08:00
if (!honor_end_cmd)
ret = 0;
break;
}
}
free(sctx.read_buf);
out:
if (last_err && !ret)
ret = last_err;
return ret;
}