2013-06-24 23:13:11 +08:00
|
|
|
|
/*
|
|
|
|
|
* QEMU backup
|
|
|
|
|
*
|
|
|
|
|
* Copyright (C) 2013 Proxmox Server Solutions
|
|
|
|
|
*
|
|
|
|
|
* Authors:
|
|
|
|
|
* Dietmar Maurer (dietmar@proxmox.com)
|
|
|
|
|
*
|
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2016-01-19 02:01:42 +08:00
|
|
|
|
#include "qemu/osdep.h"
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
|
|
|
|
#include "trace.h"
|
|
|
|
|
#include "block/block.h"
|
|
|
|
|
#include "block/block_int.h"
|
2016-10-28 00:07:00 +08:00
|
|
|
|
#include "block/blockjob_int.h"
|
2016-07-27 15:01:43 +08:00
|
|
|
|
#include "block/block_backup.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 16:01:28 +08:00
|
|
|
|
#include "qapi/error.h"
|
2015-03-18 00:22:46 +08:00
|
|
|
|
#include "qapi/qmp/qerror.h"
|
2013-06-24 23:13:11 +08:00
|
|
|
|
#include "qemu/ratelimit.h"
|
2016-03-21 01:16:19 +08:00
|
|
|
|
#include "qemu/cutils.h"
|
2015-10-19 23:53:22 +08:00
|
|
|
|
#include "sysemu/block-backend.h"
|
2016-03-08 12:44:52 +08:00
|
|
|
|
#include "qemu/bitmap.h"
|
2017-03-01 03:33:40 +08:00
|
|
|
|
#include "qemu/error-report.h"
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2016-02-26 04:58:29 +08:00
|
|
|
|
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2018-09-19 20:43:43 +08:00
|
|
|
|
typedef struct CowRequest {
|
|
|
|
|
int64_t start_byte;
|
|
|
|
|
int64_t end_byte;
|
|
|
|
|
QLIST_ENTRY(CowRequest) list;
|
|
|
|
|
CoQueue wait_queue; /* coroutines blocked on this request */
|
|
|
|
|
} CowRequest;
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
|
|
|
|
|
typedef void (*ProgressResetCallbackFunc)(void *opaque);
|
|
|
|
|
typedef struct BlockCopyState {
|
|
|
|
|
BlockBackend *source;
|
|
|
|
|
BlockBackend *target;
|
|
|
|
|
BdrvDirtyBitmap *copy_bitmap;
|
|
|
|
|
int64_t cluster_size;
|
|
|
|
|
bool use_copy_range;
|
|
|
|
|
int64_t copy_range_size;
|
|
|
|
|
uint64_t len;
|
|
|
|
|
|
|
|
|
|
BdrvRequestFlags write_flags;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* skip_unallocated:
|
|
|
|
|
*
|
|
|
|
|
* Used by sync=top jobs, which first scan the source node for unallocated
|
|
|
|
|
* areas and clear them in the copy_bitmap. During this process, the bitmap
|
|
|
|
|
* is thus not fully initialized: It may still have bits set for areas that
|
|
|
|
|
* are unallocated and should actually not be copied.
|
|
|
|
|
*
|
|
|
|
|
* This is indicated by skip_unallocated.
|
|
|
|
|
*
|
|
|
|
|
* In this case, block_copy() will query the source’s allocation status,
|
|
|
|
|
* skip unallocated regions, clear them in the copy_bitmap, and invoke
|
|
|
|
|
* block_copy_reset_unallocated() every time it does.
|
|
|
|
|
*/
|
|
|
|
|
bool skip_unallocated;
|
|
|
|
|
|
|
|
|
|
/* progress_bytes_callback: called when some copying progress is done. */
|
|
|
|
|
ProgressBytesCallbackFunc progress_bytes_callback;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* progress_reset_callback: called when some bytes reset from copy_bitmap
|
|
|
|
|
* (see @skip_unallocated above). The callee is assumed to recalculate how
|
|
|
|
|
* many bytes remain based on the dirty bit count of copy_bitmap.
|
|
|
|
|
*/
|
|
|
|
|
ProgressResetCallbackFunc progress_reset_callback;
|
|
|
|
|
void *progress_opaque;
|
|
|
|
|
} BlockCopyState;
|
|
|
|
|
|
2013-06-24 23:13:11 +08:00
|
|
|
|
typedef struct BackupBlockJob {
|
|
|
|
|
BlockJob common;
|
2019-09-20 22:20:46 +08:00
|
|
|
|
BlockDriverState *source_bs;
|
2019-07-30 04:35:53 +08:00
|
|
|
|
|
2015-04-18 07:49:58 +08:00
|
|
|
|
BdrvDirtyBitmap *sync_bitmap;
|
2019-07-30 04:35:53 +08:00
|
|
|
|
|
2013-07-27 02:39:04 +08:00
|
|
|
|
MirrorSyncMode sync_mode;
|
2019-07-30 04:35:52 +08:00
|
|
|
|
BitmapSyncMode bitmap_mode;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
BlockdevOnError on_source_error;
|
|
|
|
|
BlockdevOnError on_target_error;
|
|
|
|
|
CoRwlock flush_rwlock;
|
2018-01-19 01:08:22 +08:00
|
|
|
|
uint64_t len;
|
2017-07-07 20:44:53 +08:00
|
|
|
|
uint64_t bytes_read;
|
2016-02-26 04:58:29 +08:00
|
|
|
|
int64_t cluster_size;
|
2016-01-27 07:54:58 +08:00
|
|
|
|
NotifierWithReturn before_write;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
QLIST_HEAD(, CowRequest) inflight_reqs;
|
2017-10-12 21:53:10 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
BlockCopyState *bcs;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
} BackupBlockJob;
|
|
|
|
|
|
2018-01-19 22:54:40 +08:00
|
|
|
|
static const BlockJobDriver backup_job_driver;
|
|
|
|
|
|
2013-06-24 23:13:11 +08:00
|
|
|
|
/* See if in-flight requests overlap and wait for them to complete */
|
|
|
|
|
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
|
|
|
|
|
int64_t start,
|
|
|
|
|
int64_t end)
|
|
|
|
|
{
|
|
|
|
|
CowRequest *req;
|
|
|
|
|
bool retry;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
retry = false;
|
|
|
|
|
QLIST_FOREACH(req, &job->inflight_reqs, list) {
|
2017-07-07 20:44:54 +08:00
|
|
|
|
if (end > req->start_byte && start < req->end_byte) {
|
2017-02-14 02:12:43 +08:00
|
|
|
|
qemu_co_queue_wait(&req->wait_queue, NULL);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
retry = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} while (retry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Keep track of an in-flight request */
|
|
|
|
|
static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
|
2017-07-07 20:44:54 +08:00
|
|
|
|
int64_t start, int64_t end)
|
2013-06-24 23:13:11 +08:00
|
|
|
|
{
|
2017-07-07 20:44:54 +08:00
|
|
|
|
req->start_byte = start;
|
|
|
|
|
req->end_byte = end;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
qemu_co_queue_init(&req->wait_queue);
|
|
|
|
|
QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Forget about a completed request */
|
|
|
|
|
static void cow_request_end(CowRequest *req)
|
|
|
|
|
{
|
|
|
|
|
QLIST_REMOVE(req, list);
|
|
|
|
|
qemu_co_queue_restart_all(&req->wait_queue);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static void block_copy_state_free(BlockCopyState *s)
|
|
|
|
|
{
|
|
|
|
|
if (!s) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bdrv_release_dirty_bitmap(blk_bs(s->source), s->copy_bitmap);
|
|
|
|
|
blk_unref(s->source);
|
|
|
|
|
blk_unref(s->target);
|
|
|
|
|
g_free(s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static BlockCopyState *block_copy_state_new(
|
|
|
|
|
BlockDriverState *source, BlockDriverState *target,
|
|
|
|
|
int64_t cluster_size, BdrvRequestFlags write_flags,
|
|
|
|
|
ProgressBytesCallbackFunc progress_bytes_callback,
|
|
|
|
|
ProgressResetCallbackFunc progress_reset_callback,
|
|
|
|
|
void *progress_opaque, Error **errp)
|
|
|
|
|
{
|
|
|
|
|
BlockCopyState *s;
|
|
|
|
|
int ret;
|
|
|
|
|
uint64_t no_resize = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
|
|
|
|
|
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD;
|
|
|
|
|
BdrvDirtyBitmap *copy_bitmap;
|
|
|
|
|
|
|
|
|
|
copy_bitmap = bdrv_create_dirty_bitmap(source, cluster_size, NULL, errp);
|
|
|
|
|
if (!copy_bitmap) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
bdrv_disable_dirty_bitmap(copy_bitmap);
|
|
|
|
|
|
|
|
|
|
s = g_new(BlockCopyState, 1);
|
|
|
|
|
*s = (BlockCopyState) {
|
|
|
|
|
.source = blk_new(bdrv_get_aio_context(source),
|
|
|
|
|
BLK_PERM_CONSISTENT_READ, no_resize),
|
|
|
|
|
.target = blk_new(bdrv_get_aio_context(target),
|
|
|
|
|
BLK_PERM_WRITE, no_resize),
|
|
|
|
|
.copy_bitmap = copy_bitmap,
|
|
|
|
|
.cluster_size = cluster_size,
|
|
|
|
|
.len = bdrv_dirty_bitmap_size(copy_bitmap),
|
|
|
|
|
.write_flags = write_flags,
|
|
|
|
|
.progress_bytes_callback = progress_bytes_callback,
|
|
|
|
|
.progress_reset_callback = progress_reset_callback,
|
|
|
|
|
.progress_opaque = progress_opaque,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
s->copy_range_size = QEMU_ALIGN_DOWN(MIN(blk_get_max_transfer(s->source),
|
|
|
|
|
blk_get_max_transfer(s->target)),
|
|
|
|
|
s->cluster_size);
|
|
|
|
|
/*
|
|
|
|
|
* Set use_copy_range, consider the following:
|
|
|
|
|
* 1. Compression is not supported for copy_range.
|
|
|
|
|
* 2. copy_range does not respect max_transfer (it's a TODO), so we factor
|
|
|
|
|
* that in here. If max_transfer is smaller than the job->cluster_size,
|
|
|
|
|
* we do not use copy_range (in that case it's zero after aligning down
|
|
|
|
|
* above).
|
|
|
|
|
*/
|
|
|
|
|
s->use_copy_range =
|
|
|
|
|
!(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We just allow aio context change on our block backends. block_copy() user
|
|
|
|
|
* (now it's only backup) is responsible for source and target being in same
|
|
|
|
|
* aio context.
|
|
|
|
|
*/
|
|
|
|
|
blk_set_disable_request_queuing(s->source, true);
|
|
|
|
|
blk_set_allow_aio_context_change(s->source, true);
|
|
|
|
|
blk_set_disable_request_queuing(s->target, true);
|
|
|
|
|
blk_set_allow_aio_context_change(s->target, true);
|
|
|
|
|
|
|
|
|
|
ret = blk_insert_bs(s->source, source, errp);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = blk_insert_bs(s->target, target, errp);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return s;
|
|
|
|
|
|
|
|
|
|
fail:
|
|
|
|
|
block_copy_state_free(s);
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
|
|
|
|
* Copy range to target with a bounce buffer and return the bytes copied. If
|
|
|
|
|
* error occurred, return a negative error number
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
|
2018-07-03 10:37:58 +08:00
|
|
|
|
int64_t start,
|
|
|
|
|
int64_t end,
|
|
|
|
|
bool is_write_notifier,
|
|
|
|
|
bool *error_is_read,
|
|
|
|
|
void **bounce_buffer)
|
|
|
|
|
{
|
|
|
|
|
int ret;
|
|
|
|
|
int nbytes;
|
block/backup: fix fleecing scheme: use serialized writes
Fleecing scheme works as follows: we want a kind of temporary snapshot
of active drive A. We create temporary image B, with B->backing = A.
Then we start backup(sync=none) from A to B. From this point, B reads
as point-in-time snapshot of A (A continues to be active drive,
accepting guest IO).
This scheme needs some additional synchronization between reads from B
and backup COW operations, otherwise, the following situation is
theoretically possible:
(assume B is qcow2, client is NBD client, reading from B)
1. client starts reading and take qcow2 mutex in qcow2_co_preadv, and
goes up to l2 table loading (assume cache miss)
2) guest write => backup COW => qcow2 write =>
try to take qcow2 mutex => waiting
3. l2 table loaded, we see that cluster is UNALLOCATED, go to
"case QCOW2_CLUSTER_UNALLOCATED" and unlock mutex before
bdrv_co_preadv(bs->backing, ...)
4) aha, mutex unlocked, backup COW continues, and we finally finish
guest write and change cluster in our active disk A
5. actually, do bdrv_co_preadv(bs->backing, ...) and read
_new updated_ data.
To avoid this, let's make backup writes serializing, to not intersect
with reads from B.
Note: we expand range of handled cases from (sync=none and
B->backing = A) to just (A in backing chain of B), to finally allow
safe reading from B during backup for all cases when A in backing chain
of B, i.e. B formally looks like point-in-time snapshot of A.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2018-07-10 00:37:19 +08:00
|
|
|
|
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
|
2018-07-03 10:37:58 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
|
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
|
|
|
|
|
nbytes = MIN(s->cluster_size, s->len - start);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (!*bounce_buffer) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
*bounce_buffer = blk_blockalign(s->source, s->cluster_size);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = blk_co_pread(s->source, start, nbytes, *bounce_buffer, read_flags);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (ret < 0) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (error_is_read) {
|
|
|
|
|
*error_is_read = true;
|
|
|
|
|
}
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = blk_co_pwrite(s->target, start, nbytes, *bounce_buffer,
|
|
|
|
|
s->write_flags);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (ret < 0) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (error_is_read) {
|
|
|
|
|
*error_is_read = false;
|
|
|
|
|
}
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nbytes;
|
|
|
|
|
fail:
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
|
|
|
|
* Copy range to target and return the bytes copied. If error occurred, return a
|
|
|
|
|
* negative error number.
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
|
2018-07-03 10:37:58 +08:00
|
|
|
|
int64_t start,
|
|
|
|
|
int64_t end,
|
|
|
|
|
bool is_write_notifier)
|
|
|
|
|
{
|
|
|
|
|
int ret;
|
|
|
|
|
int nr_clusters;
|
|
|
|
|
int nbytes;
|
block/backup: fix fleecing scheme: use serialized writes
Fleecing scheme works as follows: we want a kind of temporary snapshot
of active drive A. We create temporary image B, with B->backing = A.
Then we start backup(sync=none) from A to B. From this point, B reads
as point-in-time snapshot of A (A continues to be active drive,
accepting guest IO).
This scheme needs some additional synchronization between reads from B
and backup COW operations, otherwise, the following situation is
theoretically possible:
(assume B is qcow2, client is NBD client, reading from B)
1. client starts reading and take qcow2 mutex in qcow2_co_preadv, and
goes up to l2 table loading (assume cache miss)
2) guest write => backup COW => qcow2 write =>
try to take qcow2 mutex => waiting
3. l2 table loaded, we see that cluster is UNALLOCATED, go to
"case QCOW2_CLUSTER_UNALLOCATED" and unlock mutex before
bdrv_co_preadv(bs->backing, ...)
4) aha, mutex unlocked, backup COW continues, and we finally finish
guest write and change cluster in our active disk A
5. actually, do bdrv_co_preadv(bs->backing, ...) and read
_new updated_ data.
To avoid this, let's make backup writes serializing, to not intersect
with reads from B.
Note: we expand range of handled cases from (sync=none and
B->backing = A) to just (A in backing chain of B), to finally allow
safe reading from B during backup for all cases when A in backing chain
of B, i.e. B formally looks like point-in-time snapshot of A.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2018-07-10 00:37:19 +08:00
|
|
|
|
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
|
2018-07-03 10:37:58 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
|
|
|
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
|
|
|
nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
|
|
|
|
|
nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
|
|
|
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
|
|
|
|
|
s->cluster_size * nr_clusters);
|
|
|
|
|
ret = blk_co_copy_range(s->source, start, s->target, start, nbytes,
|
|
|
|
|
read_flags, s->write_flags);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (ret < 0) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
trace_block_copy_with_offload_fail(s, start, ret);
|
|
|
|
|
bdrv_set_dirty_bitmap(s->copy_bitmap, start,
|
|
|
|
|
s->cluster_size * nr_clusters);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nbytes;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
/*
|
|
|
|
|
* Check if the cluster starting at offset is allocated or not.
|
|
|
|
|
* return via pnum the number of contiguous clusters sharing this allocation.
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
|
|
|
|
|
int64_t *pnum)
|
2019-07-30 04:35:55 +08:00
|
|
|
|
{
|
2019-09-20 22:20:46 +08:00
|
|
|
|
BlockDriverState *bs = blk_bs(s->source);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
int64_t count, total_count = 0;
|
|
|
|
|
int64_t bytes = s->len - offset;
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
ret = bdrv_is_allocated(bs, offset, bytes, &count);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
total_count += count;
|
|
|
|
|
|
|
|
|
|
if (ret || count == 0) {
|
|
|
|
|
/*
|
|
|
|
|
* ret: partial segment(s) are considered allocated.
|
|
|
|
|
* otherwise: unallocated tail is treated as an entire segment.
|
|
|
|
|
*/
|
|
|
|
|
*pnum = DIV_ROUND_UP(total_count, s->cluster_size);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Unallocated segment(s) with uncertain following segment(s) */
|
|
|
|
|
if (total_count >= s->cluster_size) {
|
|
|
|
|
*pnum = total_count / s->cluster_size;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
offset += count;
|
|
|
|
|
bytes -= count;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
2019-07-30 04:35:55 +08:00
|
|
|
|
* Reset bits in copy_bitmap starting at offset if they represent unallocated
|
|
|
|
|
* data in the image. May reset subsequent contiguous bits.
|
|
|
|
|
* @return 0 when the cluster at @offset was unallocated,
|
|
|
|
|
* 1 otherwise, and -ret on error.
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static int64_t block_copy_reset_unallocated(BlockCopyState *s,
|
|
|
|
|
int64_t offset, int64_t *count)
|
2019-07-30 04:35:55 +08:00
|
|
|
|
{
|
|
|
|
|
int ret;
|
2019-09-20 22:20:46 +08:00
|
|
|
|
int64_t clusters, bytes;
|
2019-07-30 04:35:55 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = block_copy_is_cluster_allocated(s, offset, &clusters);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (ret < 0) {
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bytes = clusters * s->cluster_size;
|
|
|
|
|
|
|
|
|
|
if (!ret) {
|
|
|
|
|
bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
|
2019-09-20 22:20:46 +08:00
|
|
|
|
s->progress_reset_callback(s->progress_opaque);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*count = bytes;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static int coroutine_fn block_copy(BlockCopyState *s,
|
|
|
|
|
int64_t start, uint64_t bytes,
|
|
|
|
|
bool *error_is_read,
|
|
|
|
|
bool is_write_notifier)
|
2013-06-24 23:13:11 +08:00
|
|
|
|
{
|
|
|
|
|
int ret = 0;
|
2019-09-20 22:20:44 +08:00
|
|
|
|
int64_t end = bytes + start; /* bytes */
|
2018-07-03 10:37:58 +08:00
|
|
|
|
void *bounce_buffer = NULL;
|
2019-07-30 04:35:55 +08:00
|
|
|
|
int64_t status_bytes;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
/*
|
|
|
|
|
* block_copy() user is responsible for keeping source and target in same
|
|
|
|
|
* aio context
|
|
|
|
|
*/
|
|
|
|
|
assert(blk_get_aio_context(s->source) == blk_get_aio_context(s->target));
|
|
|
|
|
|
|
|
|
|
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
|
|
|
|
|
assert(QEMU_IS_ALIGNED(end, s->cluster_size));
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2018-07-03 10:37:58 +08:00
|
|
|
|
while (start < end) {
|
2019-08-02 01:38:59 +08:00
|
|
|
|
int64_t dirty_end;
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
|
|
|
|
|
trace_block_copy_skip(s, start);
|
|
|
|
|
start += s->cluster_size;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
continue; /* already copied */
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
|
2019-07-30 04:35:53 +08:00
|
|
|
|
(end - start));
|
2019-08-02 01:38:59 +08:00
|
|
|
|
if (dirty_end < 0) {
|
|
|
|
|
dirty_end = end;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
if (s->skip_unallocated) {
|
|
|
|
|
ret = block_copy_reset_unallocated(s, start, &status_bytes);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (ret == 0) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
trace_block_copy_skip_range(s, start, status_bytes);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
start += status_bytes;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
/* Clamp to known allocated region */
|
|
|
|
|
dirty_end = MIN(dirty_end, start + status_bytes);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
trace_block_copy_process(s, start);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
if (s->use_copy_range) {
|
|
|
|
|
ret = block_copy_with_offload(s, start, dirty_end,
|
2019-08-02 01:38:59 +08:00
|
|
|
|
is_write_notifier);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
if (ret < 0) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
s->use_copy_range = false;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-09-20 22:20:46 +08:00
|
|
|
|
if (!s->use_copy_range) {
|
|
|
|
|
ret = block_copy_with_bounce_buffer(s, start, dirty_end,
|
2019-08-02 01:38:59 +08:00
|
|
|
|
is_write_notifier,
|
2018-07-03 10:37:58 +08:00
|
|
|
|
error_is_read, &bounce_buffer);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
if (ret < 0) {
|
2018-07-03 10:37:58 +08:00
|
|
|
|
break;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-03 10:37:58 +08:00
|
|
|
|
start += ret;
|
2019-09-20 22:20:46 +08:00
|
|
|
|
s->progress_bytes_callback(ret, s->progress_opaque);
|
2018-07-03 10:37:58 +08:00
|
|
|
|
ret = 0;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bounce_buffer) {
|
|
|
|
|
qemu_vfree(bounce_buffer);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:44 +08:00
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
|
|
|
|
|
{
|
|
|
|
|
BackupBlockJob *s = opaque;
|
|
|
|
|
|
|
|
|
|
s->bytes_read += bytes;
|
|
|
|
|
job_progress_update(&s->common.job, bytes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void backup_progress_reset_callback(void *opaque)
|
|
|
|
|
{
|
|
|
|
|
BackupBlockJob *s = opaque;
|
|
|
|
|
uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap);
|
|
|
|
|
|
|
|
|
|
job_progress_set_remaining(&s->common.job, estimate);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:44 +08:00
|
|
|
|
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
|
|
|
|
|
int64_t offset, uint64_t bytes,
|
|
|
|
|
bool *error_is_read,
|
|
|
|
|
bool is_write_notifier)
|
|
|
|
|
{
|
|
|
|
|
CowRequest cow_request;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
int64_t start, end; /* bytes */
|
|
|
|
|
|
|
|
|
|
qemu_co_rwlock_rdlock(&job->flush_rwlock);
|
|
|
|
|
|
|
|
|
|
start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
|
|
|
|
|
end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
|
|
|
|
|
|
|
|
|
|
trace_backup_do_cow_enter(job, start, offset, bytes);
|
|
|
|
|
|
|
|
|
|
wait_for_overlapping_requests(job, start, end);
|
|
|
|
|
cow_request_begin(&cow_request, job, start, end);
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = block_copy(job->bcs, start, end - start, error_is_read,
|
|
|
|
|
is_write_notifier);
|
2019-09-20 22:20:44 +08:00
|
|
|
|
|
2013-06-24 23:13:11 +08:00
|
|
|
|
cow_request_end(&cow_request);
|
|
|
|
|
|
2017-07-07 20:44:55 +08:00
|
|
|
|
trace_backup_do_cow_return(job, offset, bytes, ret);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
|
|
|
|
qemu_co_rwlock_unlock(&job->flush_rwlock);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int coroutine_fn backup_before_write_notify(
|
|
|
|
|
NotifierWithReturn *notifier,
|
|
|
|
|
void *opaque)
|
|
|
|
|
{
|
2016-01-27 07:54:58 +08:00
|
|
|
|
BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
BdrvTrackedRequest *req = opaque;
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
assert(req->bs == job->source_bs);
|
2017-07-07 20:44:55 +08:00
|
|
|
|
assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
|
|
|
|
|
assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
|
2013-12-03 22:31:25 +08:00
|
|
|
|
|
2017-07-07 20:44:55 +08:00
|
|
|
|
return backup_do_cow(job, req->offset, req->bytes, NULL, true);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-06 07:13:10 +08:00
|
|
|
|
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
|
|
|
|
|
{
|
|
|
|
|
BdrvDirtyBitmap *bm;
|
2019-07-30 04:35:53 +08:00
|
|
|
|
bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \
|
|
|
|
|
&& (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER));
|
2015-11-06 07:13:10 +08:00
|
|
|
|
|
2019-07-30 04:35:53 +08:00
|
|
|
|
if (sync) {
|
2019-07-30 04:35:53 +08:00
|
|
|
|
/*
|
2019-07-30 04:35:53 +08:00
|
|
|
|
* We succeeded, or we always intended to sync the bitmap.
|
|
|
|
|
* Delete this bitmap and install the child.
|
2019-07-30 04:35:53 +08:00
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bm = bdrv_dirty_bitmap_abdicate(job->source_bs, job->sync_bitmap, NULL);
|
2019-07-30 04:35:53 +08:00
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* We failed, or we never intended to sync the bitmap anyway.
|
|
|
|
|
* Merge the successor back into the parent, keeping all data.
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bm = bdrv_reclaim_dirty_bitmap(job->source_bs, job->sync_bitmap, NULL);
|
2019-07-30 04:35:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(bm);
|
|
|
|
|
|
|
|
|
|
if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
|
|
|
|
|
/* If we failed and synced, merge in the bits we didn't copy: */
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap,
|
2019-07-30 04:35:53 +08:00
|
|
|
|
NULL, true);
|
2015-11-06 07:13:10 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-19 23:30:16 +08:00
|
|
|
|
static void backup_commit(Job *job)
|
2015-11-06 07:13:16 +08:00
|
|
|
|
{
|
2018-04-19 23:30:16 +08:00
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
|
2015-11-06 07:13:16 +08:00
|
|
|
|
if (s->sync_bitmap) {
|
|
|
|
|
backup_cleanup_sync_bitmap(s, 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-19 23:30:16 +08:00
|
|
|
|
static void backup_abort(Job *job)
|
2015-11-06 07:13:16 +08:00
|
|
|
|
{
|
2018-04-19 23:30:16 +08:00
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
|
2015-11-06 07:13:16 +08:00
|
|
|
|
if (s->sync_bitmap) {
|
|
|
|
|
backup_cleanup_sync_bitmap(s, -1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-19 23:30:16 +08:00
|
|
|
|
static void backup_clean(Job *job)
|
blockjob: add .clean property
Cleaning up after we have deferred to the main thread but before the
transaction has converged can be dangerous and result in deadlocks
if the job cleanup invokes any BH polling loops.
A job may attempt to begin cleaning up, but may induce another job to
enter its cleanup routine. The second job, part of our same transaction,
will block waiting for the first job to finish, so neither job may now
make progress.
To rectify this, allow jobs to register a cleanup operation that will
always run regardless of if the job was in a transaction or not, and
if the transaction job group completed successfully or not.
Move sensitive cleanup to this callback instead which is guaranteed to
be run only after the transaction has converged, which removes sensitive
timing constraints from said cleanup.
Furthermore, in future patches these cleanup operations will be performed
regardless of whether or not we actually started the job. Therefore,
cleanup callbacks should essentially confine themselves to undoing create
operations, e.g. setup actions taken in what is now backup_start.
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 1478587839-9834-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-11-08 14:50:35 +08:00
|
|
|
|
{
|
2018-04-19 23:30:16 +08:00
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
|
2019-04-29 17:08:39 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
block_copy_state_free(s->bcs);
|
blockjob: add .clean property
Cleaning up after we have deferred to the main thread but before the
transaction has converged can be dangerous and result in deadlocks
if the job cleanup invokes any BH polling loops.
A job may attempt to begin cleaning up, but may induce another job to
enter its cleanup routine. The second job, part of our same transaction,
will block waiting for the first job to finish, so neither job may now
make progress.
To rectify this, allow jobs to register a cleanup operation that will
always run regardless of if the job was in a transaction or not, and
if the transaction job group completed successfully or not.
Move sensitive cleanup to this callback instead which is guaranteed to
be run only after the transaction has converged, which removes sensitive
timing constraints from said cleanup.
Furthermore, in future patches these cleanup operations will be performed
regardless of whether or not we actually started the job. Therefore,
cleanup callbacks should essentially confine themselves to undoing create
operations, e.g. setup actions taken in what is now backup_start.
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Message-id: 1478587839-9834-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-11-08 14:50:35 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-07-27 15:01:43 +08:00
|
|
|
|
void backup_do_checkpoint(BlockJob *job, Error **errp)
|
|
|
|
|
{
|
|
|
|
|
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
|
|
|
|
|
|
2018-01-19 22:54:40 +08:00
|
|
|
|
assert(block_job_driver(job) == &backup_job_driver);
|
2016-07-27 15:01:43 +08:00
|
|
|
|
|
|
|
|
|
if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
|
|
|
|
|
error_setg(errp, "The backup job only supports block checkpoint in"
|
|
|
|
|
" sync=none mode");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len);
|
2016-07-27 15:01:43 +08:00
|
|
|
|
}
|
|
|
|
|
|
2013-06-24 23:13:11 +08:00
|
|
|
|
static BlockErrorAction backup_error_action(BackupBlockJob *job,
|
|
|
|
|
bool read, int error)
|
|
|
|
|
{
|
|
|
|
|
if (read) {
|
2016-04-18 17:36:38 +08:00
|
|
|
|
return block_job_error_action(&job->common, job->on_source_error,
|
|
|
|
|
true, error);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
} else {
|
2016-04-18 17:36:38 +08:00
|
|
|
|
return block_job_error_action(&job->common, job->on_target_error,
|
|
|
|
|
false, error);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-18 07:49:58 +08:00
|
|
|
|
static bool coroutine_fn yield_and_check(BackupBlockJob *job)
|
|
|
|
|
{
|
2018-01-19 04:19:38 +08:00
|
|
|
|
uint64_t delay_ns;
|
|
|
|
|
|
2018-04-17 18:56:07 +08:00
|
|
|
|
if (job_is_cancelled(&job->common.job)) {
|
2015-04-18 07:49:58 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
|
|
|
|
* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
|
|
|
|
|
* return. Without a yield, the VM would not reboot.
|
|
|
|
|
*/
|
2018-01-19 04:19:38 +08:00
|
|
|
|
delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
|
|
|
|
|
job->bytes_read = 0;
|
2018-04-18 22:32:20 +08:00
|
|
|
|
job_sleep_ns(&job->common.job, delay_ns);
|
2015-04-18 07:49:58 +08:00
|
|
|
|
|
2018-04-17 18:56:07 +08:00
|
|
|
|
if (job_is_cancelled(&job->common.job)) {
|
2015-04-18 07:49:58 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-29 17:08:41 +08:00
|
|
|
|
static int coroutine_fn backup_loop(BackupBlockJob *job)
|
2015-04-18 07:49:58 +08:00
|
|
|
|
{
|
|
|
|
|
bool error_is_read;
|
2019-04-29 17:08:39 +08:00
|
|
|
|
int64_t offset;
|
2019-07-30 04:35:53 +08:00
|
|
|
|
BdrvDirtyBitmapIter *bdbi;
|
|
|
|
|
int ret = 0;
|
2015-04-18 07:49:58 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap);
|
2019-07-30 04:35:53 +08:00
|
|
|
|
while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
|
2017-10-12 21:53:13 +08:00
|
|
|
|
do {
|
|
|
|
|
if (yield_and_check(job)) {
|
2019-07-30 04:35:53 +08:00
|
|
|
|
goto out;
|
2017-10-12 21:53:13 +08:00
|
|
|
|
}
|
2019-04-29 17:08:39 +08:00
|
|
|
|
ret = backup_do_cow(job, offset,
|
2017-10-12 21:53:13 +08:00
|
|
|
|
job->cluster_size, &error_is_read, false);
|
|
|
|
|
if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
|
|
|
|
|
BLOCK_ERROR_ACTION_REPORT)
|
|
|
|
|
{
|
2019-07-30 04:35:53 +08:00
|
|
|
|
goto out;
|
2017-10-12 21:53:13 +08:00
|
|
|
|
}
|
|
|
|
|
} while (ret < 0);
|
2015-04-18 07:49:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-30 04:35:53 +08:00
|
|
|
|
out:
|
|
|
|
|
bdrv_dirty_iter_free(bdbi);
|
|
|
|
|
return ret;
|
2015-04-18 07:49:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
static void backup_init_copy_bitmap(BackupBlockJob *job)
|
2017-10-12 21:53:11 +08:00
|
|
|
|
{
|
2019-07-30 04:35:55 +08:00
|
|
|
|
bool ret;
|
|
|
|
|
uint64_t estimate;
|
|
|
|
|
|
|
|
|
|
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap,
|
2019-07-30 04:35:55 +08:00
|
|
|
|
job->sync_bitmap,
|
|
|
|
|
NULL, true);
|
|
|
|
|
assert(ret);
|
|
|
|
|
} else {
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
|
|
|
|
|
/*
|
|
|
|
|
* We can't hog the coroutine to initialize this thoroughly.
|
|
|
|
|
* Set a flag and resume work when we are able to yield safely.
|
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
job->bcs->skip_unallocated = true;
|
2019-07-30 04:35:55 +08:00
|
|
|
|
}
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
}
|
2017-10-12 21:53:11 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
job_progress_set_remaining(&job->common.job, estimate);
|
2017-10-12 21:53:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-30 09:57:32 +08:00
|
|
|
|
static int coroutine_fn backup_run(Job *job, Error **errp)
|
2013-06-24 23:13:11 +08:00
|
|
|
|
{
|
2018-08-30 09:57:32 +08:00
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
int ret = 0;
|
|
|
|
|
|
2018-08-30 09:57:32 +08:00
|
|
|
|
QLIST_INIT(&s->inflight_reqs);
|
|
|
|
|
qemu_co_rwlock_init(&s->flush_rwlock);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
backup_init_copy_bitmap(s);
|
2017-10-12 21:53:11 +08:00
|
|
|
|
|
2018-08-30 09:57:32 +08:00
|
|
|
|
s->before_write.notify = backup_before_write_notify;
|
2019-09-20 22:20:46 +08:00
|
|
|
|
bdrv_add_before_write_notifier(s->source_bs, &s->before_write);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
|
|
|
|
|
int64_t offset = 0;
|
|
|
|
|
int64_t count;
|
|
|
|
|
|
|
|
|
|
for (offset = 0; offset < s->len; ) {
|
|
|
|
|
if (yield_and_check(s)) {
|
|
|
|
|
ret = -ECANCELED;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
ret = block_copy_reset_unallocated(s->bcs, offset, &count);
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (ret < 0) {
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
offset += count;
|
|
|
|
|
}
|
2019-09-20 22:20:46 +08:00
|
|
|
|
s->bcs->skip_unallocated = false;
|
2019-07-30 04:35:55 +08:00
|
|
|
|
}
|
|
|
|
|
|
2018-08-30 09:57:32 +08:00
|
|
|
|
if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
|
|
|
|
* All bits are set in copy_bitmap to allow any cluster to be copied.
|
|
|
|
|
* This does not actually require them to be copied.
|
|
|
|
|
*/
|
2018-08-30 09:57:32 +08:00
|
|
|
|
while (!job_is_cancelled(job)) {
|
2019-09-20 22:20:47 +08:00
|
|
|
|
/*
|
|
|
|
|
* Yield until the job is cancelled. We just let our before_write
|
|
|
|
|
* notify callback service CoW requests.
|
|
|
|
|
*/
|
2018-08-30 09:57:32 +08:00
|
|
|
|
job_yield(job);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
2013-07-27 02:39:04 +08:00
|
|
|
|
} else {
|
2019-04-29 17:08:41 +08:00
|
|
|
|
ret = backup_loop(s);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
out:
|
2018-08-30 09:57:32 +08:00
|
|
|
|
notifier_with_return_remove(&s->before_write);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
|
|
|
|
/* wait until pending backup_do_cow() calls have completed */
|
2018-08-30 09:57:32 +08:00
|
|
|
|
qemu_co_rwlock_wrlock(&s->flush_rwlock);
|
|
|
|
|
qemu_co_rwlock_unlock(&s->flush_rwlock);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
2018-08-30 09:57:26 +08:00
|
|
|
|
return ret;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-08 14:50:36 +08:00
|
|
|
|
static const BlockJobDriver backup_job_driver = {
|
2018-04-12 23:29:59 +08:00
|
|
|
|
.job_driver = {
|
|
|
|
|
.instance_size = sizeof(BackupBlockJob),
|
2018-04-12 23:57:08 +08:00
|
|
|
|
.job_type = JOB_TYPE_BACKUP,
|
2018-04-14 00:50:05 +08:00
|
|
|
|
.free = block_job_free,
|
2018-04-18 23:10:26 +08:00
|
|
|
|
.user_resume = block_job_user_resume,
|
2018-08-30 09:57:26 +08:00
|
|
|
|
.run = backup_run,
|
2018-04-19 23:30:16 +08:00
|
|
|
|
.commit = backup_commit,
|
|
|
|
|
.abort = backup_abort,
|
|
|
|
|
.clean = backup_clean,
|
2019-08-29 17:09:53 +08:00
|
|
|
|
}
|
2016-11-08 14:50:36 +08:00
|
|
|
|
};
|
|
|
|
|
|
2019-04-29 17:08:42 +08:00
|
|
|
|
static int64_t backup_calculate_cluster_size(BlockDriverState *target,
|
|
|
|
|
Error **errp)
|
|
|
|
|
{
|
|
|
|
|
int ret;
|
|
|
|
|
BlockDriverInfo bdi;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If there is no backing file on the target, we cannot rely on COW if our
|
|
|
|
|
* backup cluster size is smaller than the target cluster size. Even for
|
|
|
|
|
* targets with a backing file, try to avoid COW if possible.
|
|
|
|
|
*/
|
|
|
|
|
ret = bdrv_get_info(target, &bdi);
|
|
|
|
|
if (ret == -ENOTSUP && !target->backing) {
|
|
|
|
|
/* Cluster size is not defined */
|
|
|
|
|
warn_report("The target block device doesn't provide "
|
|
|
|
|
"information about the block size and it doesn't have a "
|
|
|
|
|
"backing file. The default block size of %u bytes is "
|
|
|
|
|
"used. If the actual block size of the target exceeds "
|
|
|
|
|
"this default, the backup may be unusable",
|
|
|
|
|
BACKUP_CLUSTER_SIZE_DEFAULT);
|
|
|
|
|
return BACKUP_CLUSTER_SIZE_DEFAULT;
|
|
|
|
|
} else if (ret < 0 && !target->backing) {
|
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
|
"Couldn't determine the cluster size of the target image, "
|
|
|
|
|
"which has no backing file");
|
|
|
|
|
error_append_hint(errp,
|
|
|
|
|
"Aborting, since this may create an unusable destination image\n");
|
|
|
|
|
return ret;
|
|
|
|
|
} else if (ret < 0 && target->backing) {
|
|
|
|
|
/* Not fatal; just trudge on ahead. */
|
|
|
|
|
return BACKUP_CLUSTER_SIZE_DEFAULT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-08 14:50:38 +08:00
|
|
|
|
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
|
2016-07-05 22:28:58 +08:00
|
|
|
|
BlockDriverState *target, int64_t speed,
|
|
|
|
|
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
|
2019-07-30 04:35:52 +08:00
|
|
|
|
BitmapSyncMode bitmap_mode,
|
2016-07-22 16:17:52 +08:00
|
|
|
|
bool compress,
|
2013-06-24 23:13:11 +08:00
|
|
|
|
BlockdevOnError on_source_error,
|
|
|
|
|
BlockdevOnError on_target_error,
|
2016-10-28 00:06:57 +08:00
|
|
|
|
int creation_flags,
|
2014-10-07 19:59:15 +08:00
|
|
|
|
BlockCompletionFunc *cb, void *opaque,
|
2018-04-19 22:09:52 +08:00
|
|
|
|
JobTxn *txn, Error **errp)
|
2013-06-24 23:13:11 +08:00
|
|
|
|
{
|
|
|
|
|
int64_t len;
|
2016-04-14 18:59:55 +08:00
|
|
|
|
BackupBlockJob *job = NULL;
|
2019-04-29 17:08:42 +08:00
|
|
|
|
int64_t cluster_size;
|
2019-09-20 22:20:46 +08:00
|
|
|
|
BdrvRequestFlags write_flags;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
|
|
|
|
|
assert(bs);
|
|
|
|
|
assert(target);
|
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
/* QMP interface protects us from these cases */
|
|
|
|
|
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
|
|
|
|
|
assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP);
|
|
|
|
|
|
2014-12-18 18:37:05 +08:00
|
|
|
|
if (bs == target) {
|
|
|
|
|
error_setg(errp, "Source and target cannot be the same");
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2014-12-18 18:37:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!bdrv_is_inserted(bs)) {
|
|
|
|
|
error_setg(errp, "Device is not inserted: %s",
|
|
|
|
|
bdrv_get_device_name(bs));
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2014-12-18 18:37:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!bdrv_is_inserted(target)) {
|
|
|
|
|
error_setg(errp, "Device is not inserted: %s",
|
|
|
|
|
bdrv_get_device_name(target));
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2014-12-18 18:37:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-05 00:15:06 +08:00
|
|
|
|
if (compress && !block_driver_can_compress(target->drv)) {
|
2016-07-22 16:17:52 +08:00
|
|
|
|
error_setg(errp, "Compression is not supported for this drive %s",
|
|
|
|
|
bdrv_get_device_name(target));
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2016-07-22 16:17:52 +08:00
|
|
|
|
}
|
|
|
|
|
|
2014-12-18 18:37:05 +08:00
|
|
|
|
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2014-12-18 18:37:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2014-12-18 18:37:05 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-30 04:35:55 +08:00
|
|
|
|
if (sync_bitmap) {
|
2019-07-30 04:35:54 +08:00
|
|
|
|
/* If we need to write to this bitmap, check that we can: */
|
|
|
|
|
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
|
|
|
|
|
bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-18 07:49:58 +08:00
|
|
|
|
/* Create a new bitmap, and freeze/disable this one. */
|
|
|
|
|
if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
|
2016-11-08 14:50:38 +08:00
|
|
|
|
return NULL;
|
2015-04-18 07:49:58 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-24 23:13:11 +08:00
|
|
|
|
len = bdrv_getlength(bs);
|
|
|
|
|
if (len < 0) {
|
|
|
|
|
error_setg_errno(errp, -len, "unable to get length for '%s'",
|
|
|
|
|
bdrv_get_device_name(bs));
|
2015-04-18 07:49:58 +08:00
|
|
|
|
goto error;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-04-29 17:08:42 +08:00
|
|
|
|
cluster_size = backup_calculate_cluster_size(target, errp);
|
|
|
|
|
if (cluster_size < 0) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-19 01:08:22 +08:00
|
|
|
|
/* job->len is fixed, so we can't allow resize */
|
2019-09-20 22:20:46 +08:00
|
|
|
|
job = block_job_create(job_id, &backup_job_driver, txn, bs, 0, BLK_PERM_ALL,
|
2017-01-17 00:18:09 +08:00
|
|
|
|
speed, creation_flags, cb, opaque, errp);
|
2013-06-24 23:13:11 +08:00
|
|
|
|
if (!job) {
|
2015-04-18 07:49:58 +08:00
|
|
|
|
goto error;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
job->source_bs = bs;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
job->on_source_error = on_source_error;
|
|
|
|
|
job->on_target_error = on_target_error;
|
2013-07-27 02:39:04 +08:00
|
|
|
|
job->sync_mode = sync_mode;
|
2019-07-30 04:35:52 +08:00
|
|
|
|
job->sync_bitmap = sync_bitmap;
|
|
|
|
|
job->bitmap_mode = bitmap_mode;
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-26 04:58:30 +08:00
|
|
|
|
|
2019-07-31 00:32:51 +08:00
|
|
|
|
/*
|
2019-09-20 22:20:45 +08:00
|
|
|
|
* If source is in backing chain of target assume that target is going to be
|
|
|
|
|
* used for "image fleecing", i.e. it should represent a kind of snapshot of
|
|
|
|
|
* source at backup-start point in time. And target is going to be read by
|
|
|
|
|
* somebody (for example, used as NBD export) during backup job.
|
|
|
|
|
*
|
|
|
|
|
* In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
|
|
|
|
|
* intersection of backup writes and third party reads from target,
|
|
|
|
|
* otherwise reading from target we may occasionally read already updated by
|
|
|
|
|
* guest data.
|
|
|
|
|
*
|
|
|
|
|
* For more information see commit f8d59dfb40bb and test
|
|
|
|
|
* tests/qemu-iotests/222
|
2019-07-31 00:32:51 +08:00
|
|
|
|
*/
|
2019-09-20 22:20:46 +08:00
|
|
|
|
write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
|
|
|
|
|
(compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
|
|
|
|
|
|
|
|
|
|
job->bcs = block_copy_state_new(bs, target, cluster_size, write_flags,
|
|
|
|
|
backup_progress_bytes_callback,
|
|
|
|
|
backup_progress_reset_callback, job, errp);
|
|
|
|
|
if (!job->bcs) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2019-07-31 00:32:51 +08:00
|
|
|
|
|
2019-04-29 17:08:42 +08:00
|
|
|
|
job->cluster_size = cluster_size;
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-26 04:58:30 +08:00
|
|
|
|
|
2019-09-20 22:20:46 +08:00
|
|
|
|
/* Required permissions are already taken by block-copy-state target */
|
2017-01-17 18:56:42 +08:00
|
|
|
|
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
|
|
|
|
|
&error_abort);
|
2018-01-19 01:08:22 +08:00
|
|
|
|
job->len = len;
|
2016-11-08 14:50:38 +08:00
|
|
|
|
|
|
|
|
|
return &job->common;
|
2015-04-18 07:49:58 +08:00
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
if (sync_bitmap) {
|
|
|
|
|
bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
|
|
|
|
|
}
|
2016-04-14 18:59:55 +08:00
|
|
|
|
if (job) {
|
2018-04-19 23:30:16 +08:00
|
|
|
|
backup_clean(&job->common.job);
|
|
|
|
|
job_early_fail(&job->common.job);
|
2016-04-14 18:59:55 +08:00
|
|
|
|
}
|
2016-11-08 14:50:38 +08:00
|
|
|
|
|
|
|
|
|
return NULL;
|
2013-06-24 23:13:11 +08:00
|
|
|
|
}
|