mirror of
https://github.com/qemu/qemu.git
synced 2024-12-16 16:53:28 +08:00
Block patches
- Fix padding of unaligned vectored requests to match the host alignment for vectors with 1023 or 1024 buffers - Refactor and fix bugs in parallels's image check functionality - Add an option to the qcow2 driver to retain (qcow2-level) allocations on discard requests from the guest (while still forwarding the discard to the lower level and marking the range as zero) -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmR+AT4SHGhyZWl0ekBy ZWRoYXQuY29tAAoJEKH6QNCYAZzfnboQAKD6YrreZLoseomRfqOAoApSf6yOdcHk 6kfsvzwzjosomsF1Pkzm4851vX5PyDqTdeu0iViM+pxanVO1b494q1P4VcAERqMB iZVs68R6M0l6HV9btWFGm+ibHJf4FapdntkIdwog1ka5TIhw5oDWCVNLigjhIoRv sM37Bgf14kC3sFTR++0HESsyU1eUP5gJjwJbPZ2IgJBmzYay0is1z5nHA/3VUswu 8dKnGQDsv62EtlK7PK8cU2BhLOeNi6Wr3bAb6Wf2QLB5e0qRb7oAkqNx5/UcTznk a3XMC1aiWhYvM/+DaYIpQUcIPgA8xQ1KHKeD6WjbGfLgZBqseX0aGWMByUsiY8Bo +BPIBnUDrbiPnAKB/XLQfnzlE+s7121/JpEbB7AkZqVFRGuw8Wur4tbc2fzvy8Pw x/uQfv3ZPi/2Lf6u7hv/TVHubXi8jucVgx3Ubu5Jeo3901S4/KOQBQ4BQ/GYIGQX 38ijSROcEd0eQJ1mTKPEctouxjSZCghNSbrn9DfsL1V3VWqWNKKGCU3hM+RQ1SJT 688qvnyYt8QZfTsiDSHR/GfKsufG0DkoqE7c9IhSEPohecAH8Rrc3HcLut7fuwD2 gCFQhm68CPwwRmBjPCY6Zi1RDzeOyFBSWN31T6t0yTb4OHJ/3/cSZVBJtwwkOVbx zwabHDNdY5Kw =GuoL -----END PGP SIGNATURE----- Merge tag 'pull-block-2023-06-05' of https://gitlab.com/hreitz/qemu into staging Block patches - Fix padding of unaligned vectored requests to match the host alignment for vectors with 1023 or 1024 buffers - Refactor and fix bugs in parallels's image check functionality - Add an option to the qcow2 driver to retain (qcow2-level) allocations on discard requests from the guest (while still forwarding the discard to the lower level and marking the range as zero) # -----BEGIN PGP SIGNATURE----- # # iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmR+AT4SHGhyZWl0ekBy # ZWRoYXQuY29tAAoJEKH6QNCYAZzfnboQAKD6YrreZLoseomRfqOAoApSf6yOdcHk # 6kfsvzwzjosomsF1Pkzm4851vX5PyDqTdeu0iViM+pxanVO1b494q1P4VcAERqMB # iZVs68R6M0l6HV9btWFGm+ibHJf4FapdntkIdwog1ka5TIhw5oDWCVNLigjhIoRv # sM37Bgf14kC3sFTR++0HESsyU1eUP5gJjwJbPZ2IgJBmzYay0is1z5nHA/3VUswu # 8dKnGQDsv62EtlK7PK8cU2BhLOeNi6Wr3bAb6Wf2QLB5e0qRb7oAkqNx5/UcTznk # a3XMC1aiWhYvM/+DaYIpQUcIPgA8xQ1KHKeD6WjbGfLgZBqseX0aGWMByUsiY8Bo # +BPIBnUDrbiPnAKB/XLQfnzlE+s7121/JpEbB7AkZqVFRGuw8Wur4tbc2fzvy8Pw # x/uQfv3ZPi/2Lf6u7hv/TVHubXi8jucVgx3Ubu5Jeo3901S4/KOQBQ4BQ/GYIGQX # 38ijSROcEd0eQJ1mTKPEctouxjSZCghNSbrn9DfsL1V3VWqWNKKGCU3hM+RQ1SJT # 688qvnyYt8QZfTsiDSHR/GfKsufG0DkoqE7c9IhSEPohecAH8Rrc3HcLut7fuwD2 # gCFQhm68CPwwRmBjPCY6Zi1RDzeOyFBSWN31T6t0yTb4OHJ/3/cSZVBJtwwkOVbx # zwabHDNdY5Kw # =GuoL # -----END PGP SIGNATURE----- # gpg: Signature made Mon 05 Jun 2023 08:37:34 AM PDT # gpg: using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF # gpg: issuer "hreitz@redhat.com" # gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00 4D34 A1FA 40D0 9801 9CDF * tag 'pull-block-2023-06-05' of https://gitlab.com/hreitz/qemu: qcow2: add discard-no-unref option parallels: Incorrect condition in out-of-image check parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD parallels: Move statistic collection to a separate function parallels: Move check of leaks to a separate function parallels: Fix statistics calculation parallels: Move check of cluster outside image to a separate function parallels: Move check of unclean image to a separate function parallels: Use generic infrastructure for BAT writing in parallels_co_check() parallels: create parallels_set_bat_entry_helper() to assign BAT value parallels: Fix image_end_offset and data_end after out-of-image check parallels: Fix high_off calculation in parallels_co_check() parallels: Out of image offset in BAT leads to image inflation iotests/iov-padding: New test util/iov: Remove qemu_iovec_init_extended() block: Collapse padded I/O vecs exceeding IOV_MAX util/iov: Make qiov_slice() public Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
b52daaf2c8
166
block/io.c
166
block/io.c
@ -1441,6 +1441,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
*
|
||||
* @write is true for write requests, false for read requests.
|
||||
*
|
||||
* If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
* merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
* as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
* I/O vector elements so for read requests, the data can be copied back after
|
||||
* the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@ -1449,11 +1457,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
|
||||
uint8_t *collapse_bounce_buf;
|
||||
size_t collapse_len;
|
||||
QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@ -1485,6 +1499,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
pad->write = write;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1549,8 +1565,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
/**
|
||||
* Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
*/
|
||||
static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
if (pad->collapse_bounce_buf) {
|
||||
if (!pad->write) {
|
||||
/*
|
||||
* If padding required elements in the vector to be collapsed into a
|
||||
* bounce buffer, copy the bounce buffer content back
|
||||
*/
|
||||
qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
pad->collapse_bounce_buf, pad->collapse_len);
|
||||
}
|
||||
qemu_vfree(pad->collapse_bounce_buf);
|
||||
qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
}
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@ -1558,6 +1589,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
/*
|
||||
* Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
* ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
*
|
||||
* To ensure this, when necessary, the first two or three elements of @iov are
|
||||
* merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
* bounce buffer in pad->local_qiov.
|
||||
*
|
||||
* After performing a read request, the data from the bounce buffer must be
|
||||
* copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
*/
|
||||
static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
BdrvRequestPadding *pad,
|
||||
struct iovec *iov, int niov,
|
||||
size_t iov_offset, size_t bytes)
|
||||
{
|
||||
int padded_niov, surplus_count, collapse_count;
|
||||
|
||||
/* Assert this invariant */
|
||||
assert(niov <= IOV_MAX);
|
||||
|
||||
/*
|
||||
* Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
* to the guest is not ideal, but there is little else we can do. At least
|
||||
* this will practically never happen on 64-bit systems.
|
||||
*/
|
||||
if (SIZE_MAX - pad->head < bytes ||
|
||||
SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Length of the resulting IOV if we just concatenated everything */
|
||||
padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
|
||||
qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
|
||||
if (pad->head) {
|
||||
qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
}
|
||||
|
||||
/*
|
||||
* If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
* Instead, merge the first two or three elements of @iov to reduce the
|
||||
* number of vector elements as necessary.
|
||||
*/
|
||||
if (padded_niov > IOV_MAX) {
|
||||
/*
|
||||
* Only head and tail can have lead to the number of entries exceeding
|
||||
* IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
* to reduce the number of elements by `surplus_count`, so we merge that
|
||||
* many elements plus one into one element.
|
||||
*/
|
||||
surplus_count = padded_niov - IOV_MAX;
|
||||
assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
collapse_count = surplus_count + 1;
|
||||
|
||||
/*
|
||||
* Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
* advance `iov` (and associated variables) by those elements.
|
||||
*/
|
||||
qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
collapse_count, iov_offset, SIZE_MAX);
|
||||
iov += collapse_count;
|
||||
iov_offset = 0;
|
||||
niov -= collapse_count;
|
||||
bytes -= pad->pre_collapse_qiov.size;
|
||||
|
||||
/*
|
||||
* Construct the bounce buffer to match the length of the to-collapse
|
||||
* vector elements, and for write requests, initialize it with the data
|
||||
* from those elements. Then add it to `pad->local_qiov`.
|
||||
*/
|
||||
pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
if (pad->write) {
|
||||
qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
pad->collapse_bounce_buf, pad->collapse_len);
|
||||
}
|
||||
qemu_iovec_add(&pad->local_qiov,
|
||||
pad->collapse_bounce_buf, pad->collapse_len);
|
||||
}
|
||||
|
||||
qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
|
||||
if (pad->tail) {
|
||||
qemu_iovec_add(&pad->local_qiov,
|
||||
pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
}
|
||||
|
||||
assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@ -1565,6 +1691,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
* @write is true for write requests, false for read requests.
|
||||
*
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@ -1573,26 +1701,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
bool write,
|
||||
BdrvRequestPadding *pad, bool *padded,
|
||||
BdrvRequestFlags *flags)
|
||||
{
|
||||
int ret;
|
||||
struct iovec *sliced_iov;
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
*qiov, *qiov_offset, *bytes,
|
||||
pad->buf + pad->buf_len - pad->tail,
|
||||
pad->tail);
|
||||
sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
/* Guaranteed by bdrv_check_qiov_request() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
bdrv_padding_destroy(pad);
|
||||
bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@ -1659,8 +1795,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
NULL, &flags);
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
&pad, NULL, &flags);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@ -1670,7 +1806,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
bdrv_padding_destroy(&pad);
|
||||
bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@ -2002,7 +2138,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
/* This flag doesn't make sense for padding or zero writes */
|
||||
flags &= ~BDRV_REQ_REGISTERED_BUF;
|
||||
|
||||
padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
assert(!(flags & BDRV_REQ_NO_WAIT));
|
||||
bdrv_make_request_serialising(req, align);
|
||||
@ -2050,7 +2186,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
|
||||
}
|
||||
|
||||
out:
|
||||
bdrv_padding_destroy(&pad);
|
||||
bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2118,8 +2254,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
&padded, &flags);
|
||||
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
&pad, &padded, &flags);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@ -2149,7 +2285,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
bdrv_padding_destroy(&pad);
|
||||
bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
|
@ -165,6 +165,13 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
|
||||
return start_off;
|
||||
}
|
||||
|
||||
static void parallels_set_bat_entry(BDRVParallelsState *s,
|
||||
uint32_t index, uint32_t offset)
|
||||
{
|
||||
s->bat_bitmap[index] = cpu_to_le32(offset);
|
||||
bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
|
||||
}
|
||||
|
||||
static int64_t coroutine_fn GRAPH_RDLOCK
|
||||
allocate_clusters(BlockDriverState *bs, int64_t sector_num,
|
||||
int nb_sectors, int *pnum)
|
||||
@ -251,10 +258,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
|
||||
}
|
||||
|
||||
for (i = 0; i < to_allocate; i++) {
|
||||
s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
|
||||
parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
|
||||
s->data_end += s->tracks;
|
||||
bitmap_set(s->bat_dirty_bmap,
|
||||
bat_entry_off(idx + i) / s->bat_dirty_block, 1);
|
||||
}
|
||||
|
||||
return bat2sect(s, idx) + sector_num % s->tracks;
|
||||
@ -415,16 +420,33 @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int coroutine_fn GRAPH_RDLOCK
|
||||
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
static void parallels_check_unclean(BlockDriverState *bs,
|
||||
BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
{
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
|
||||
if (!s->header_unclean) {
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s image was not closed correctly\n",
|
||||
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
|
||||
res->corruptions++;
|
||||
if (fix & BDRV_FIX_ERRORS) {
|
||||
/* parallels_close will do the job right */
|
||||
res->corruptions_fixed++;
|
||||
s->header_unclean = false;
|
||||
}
|
||||
}
|
||||
|
||||
static int coroutine_fn GRAPH_RDLOCK
|
||||
parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
{
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
int64_t size, prev_off, high_off;
|
||||
int ret;
|
||||
uint32_t i;
|
||||
bool flush_bat = false;
|
||||
int64_t off, high_off, size;
|
||||
|
||||
size = bdrv_getlength(bs->file->bs);
|
||||
if (size < 0) {
|
||||
@ -432,65 +454,48 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
return size;
|
||||
}
|
||||
|
||||
qemu_co_mutex_lock(&s->lock);
|
||||
if (s->header_unclean) {
|
||||
fprintf(stderr, "%s image was not closed correctly\n",
|
||||
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
|
||||
res->corruptions++;
|
||||
if (fix & BDRV_FIX_ERRORS) {
|
||||
/* parallels_close will do the job right */
|
||||
res->corruptions_fixed++;
|
||||
s->header_unclean = false;
|
||||
}
|
||||
}
|
||||
|
||||
res->bfi.total_clusters = s->bat_size;
|
||||
res->bfi.compressed_clusters = 0; /* compression is not supported */
|
||||
|
||||
high_off = 0;
|
||||
prev_off = 0;
|
||||
for (i = 0; i < s->bat_size; i++) {
|
||||
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
|
||||
if (off == 0) {
|
||||
prev_off = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* cluster outside the image */
|
||||
if (off > size) {
|
||||
off = bat2sect(s, i) << BDRV_SECTOR_BITS;
|
||||
if (off + s->cluster_size > size) {
|
||||
fprintf(stderr, "%s cluster %u is outside image\n",
|
||||
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
|
||||
res->corruptions++;
|
||||
if (fix & BDRV_FIX_ERRORS) {
|
||||
prev_off = 0;
|
||||
s->bat_bitmap[i] = 0;
|
||||
parallels_set_bat_entry(s, i, 0);
|
||||
res->corruptions_fixed++;
|
||||
flush_bat = true;
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
res->bfi.allocated_clusters++;
|
||||
if (off > high_off) {
|
||||
if (high_off < off) {
|
||||
high_off = off;
|
||||
}
|
||||
|
||||
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
|
||||
res->bfi.fragmented_clusters++;
|
||||
}
|
||||
prev_off = off;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (flush_bat) {
|
||||
ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
|
||||
if (ret < 0) {
|
||||
res->check_errors++;
|
||||
goto out;
|
||||
}
|
||||
if (high_off == 0) {
|
||||
res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
|
||||
} else {
|
||||
res->image_end_offset = high_off + s->cluster_size;
|
||||
s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int coroutine_fn GRAPH_RDLOCK
|
||||
parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
{
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
int64_t size;
|
||||
int ret;
|
||||
|
||||
size = bdrv_getlength(bs->file->bs);
|
||||
if (size < 0) {
|
||||
res->check_errors++;
|
||||
return size;
|
||||
}
|
||||
|
||||
res->image_end_offset = high_off + s->cluster_size;
|
||||
if (size > res->image_end_offset) {
|
||||
int64_t count;
|
||||
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
|
||||
@ -510,14 +515,74 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
if (ret < 0) {
|
||||
error_report_err(local_err);
|
||||
res->check_errors++;
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
res->leaks_fixed += count;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
qemu_co_mutex_unlock(&s->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void parallels_collect_statistics(BlockDriverState *bs,
|
||||
BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
{
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
int64_t off, prev_off;
|
||||
uint32_t i;
|
||||
|
||||
res->bfi.total_clusters = s->bat_size;
|
||||
res->bfi.compressed_clusters = 0; /* compression is not supported */
|
||||
|
||||
prev_off = 0;
|
||||
for (i = 0; i < s->bat_size; i++) {
|
||||
off = bat2sect(s, i) << BDRV_SECTOR_BITS;
|
||||
/*
|
||||
* If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
|
||||
* fixed. Skip not allocated and out-of-image BAT entries.
|
||||
*/
|
||||
if (off == 0 || off + s->cluster_size > res->image_end_offset) {
|
||||
prev_off = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
|
||||
res->bfi.fragmented_clusters++;
|
||||
}
|
||||
prev_off = off;
|
||||
res->bfi.allocated_clusters++;
|
||||
}
|
||||
}
|
||||
|
||||
static int coroutine_fn GRAPH_RDLOCK
|
||||
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
|
||||
BdrvCheckMode fix)
|
||||
{
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
int ret;
|
||||
|
||||
WITH_QEMU_LOCK_GUARD(&s->lock) {
|
||||
parallels_check_unclean(bs, res, fix);
|
||||
|
||||
ret = parallels_check_outside_image(bs, res, fix);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = parallels_check_leak(bs, res, fix);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
parallels_collect_statistics(bs, res, fix);
|
||||
}
|
||||
|
||||
ret = bdrv_co_flush(bs);
|
||||
if (ret < 0) {
|
||||
res->check_errors++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -733,6 +798,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
BDRVParallelsState *s = bs->opaque;
|
||||
ParallelsHeader ph;
|
||||
int ret, size, i;
|
||||
int64_t file_nb_sectors;
|
||||
QemuOpts *opts = NULL;
|
||||
Error *local_err = NULL;
|
||||
char *buf;
|
||||
@ -742,6 +808,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
|
||||
if (file_nb_sectors < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
@ -806,6 +877,17 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
|
||||
for (i = 0; i < s->bat_size; i++) {
|
||||
int64_t off = bat2sect(s, i);
|
||||
if (off >= file_nb_sectors) {
|
||||
if (flags & BDRV_O_CHECK) {
|
||||
continue;
|
||||
}
|
||||
error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
|
||||
"is larger than file size (%" PRIi64 ")",
|
||||
off << BDRV_SECTOR_BITS, i,
|
||||
file_nb_sectors << BDRV_SECTOR_BITS);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
if (off >= s->data_end) {
|
||||
s->data_end = off + s->tracks;
|
||||
}
|
||||
|
@ -1925,6 +1925,10 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t new_l2_bitmap = old_l2_bitmap;
|
||||
QCow2ClusterType cluster_type =
|
||||
qcow2_get_cluster_type(bs, old_l2_entry);
|
||||
bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
|
||||
!full_discard &&
|
||||
(s->discard_no_unref &&
|
||||
type == QCOW2_DISCARD_REQUEST);
|
||||
|
||||
/*
|
||||
* If full_discard is true, the cluster should not read back as zeroes,
|
||||
@ -1943,10 +1947,22 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||
new_l2_entry = new_l2_bitmap = 0;
|
||||
} else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
|
||||
if (has_subclusters(s)) {
|
||||
new_l2_entry = 0;
|
||||
if (keep_reference) {
|
||||
new_l2_entry = old_l2_entry;
|
||||
} else {
|
||||
new_l2_entry = 0;
|
||||
}
|
||||
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
|
||||
} else {
|
||||
new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
|
||||
if (s->qcow_version >= 3) {
|
||||
if (keep_reference) {
|
||||
new_l2_entry |= QCOW_OFLAG_ZERO;
|
||||
} else {
|
||||
new_l2_entry = QCOW_OFLAG_ZERO;
|
||||
}
|
||||
} else {
|
||||
new_l2_entry = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1960,8 +1976,16 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
|
||||
if (has_subclusters(s)) {
|
||||
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
|
||||
}
|
||||
/* Then decrease the refcount */
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, type);
|
||||
if (!keep_reference) {
|
||||
/* Then decrease the refcount */
|
||||
qcow2_free_any_cluster(bs, old_l2_entry, type);
|
||||
} else if (s->discard_passthrough[type] &&
|
||||
(cluster_type == QCOW2_CLUSTER_NORMAL ||
|
||||
cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
|
||||
/* If we keep the reference, pass on the discard still */
|
||||
bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
|
||||
s->cluster_size);
|
||||
}
|
||||
}
|
||||
|
||||
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
|
||||
|
@ -682,6 +682,7 @@ static const char *const mutable_opts[] = {
|
||||
QCOW2_OPT_DISCARD_REQUEST,
|
||||
QCOW2_OPT_DISCARD_SNAPSHOT,
|
||||
QCOW2_OPT_DISCARD_OTHER,
|
||||
QCOW2_OPT_DISCARD_NO_UNREF,
|
||||
QCOW2_OPT_OVERLAP,
|
||||
QCOW2_OPT_OVERLAP_TEMPLATE,
|
||||
QCOW2_OPT_OVERLAP_MAIN_HEADER,
|
||||
@ -726,6 +727,11 @@ static QemuOptsList qcow2_runtime_opts = {
|
||||
.type = QEMU_OPT_BOOL,
|
||||
.help = "Generate discard requests when other clusters are freed",
|
||||
},
|
||||
{
|
||||
.name = QCOW2_OPT_DISCARD_NO_UNREF,
|
||||
.type = QEMU_OPT_BOOL,
|
||||
.help = "Do not unreference discarded clusters",
|
||||
},
|
||||
{
|
||||
.name = QCOW2_OPT_OVERLAP,
|
||||
.type = QEMU_OPT_STRING,
|
||||
@ -969,6 +975,7 @@ typedef struct Qcow2ReopenState {
|
||||
bool use_lazy_refcounts;
|
||||
int overlap_check;
|
||||
bool discard_passthrough[QCOW2_DISCARD_MAX];
|
||||
bool discard_no_unref;
|
||||
uint64_t cache_clean_interval;
|
||||
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
|
||||
} Qcow2ReopenState;
|
||||
@ -1140,6 +1147,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
|
||||
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
|
||||
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
|
||||
|
||||
r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
|
||||
false);
|
||||
if (r->discard_no_unref && s->qcow_version < 3) {
|
||||
error_setg(errp,
|
||||
"discard-no-unref is only supported since qcow2 version 3");
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
switch (s->crypt_method_header) {
|
||||
case QCOW_CRYPT_NONE:
|
||||
if (encryptfmt) {
|
||||
@ -1220,6 +1236,8 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
|
||||
s->discard_passthrough[i] = r->discard_passthrough[i];
|
||||
}
|
||||
|
||||
s->discard_no_unref = r->discard_no_unref;
|
||||
|
||||
if (s->cache_clean_interval != r->cache_clean_interval) {
|
||||
cache_clean_timer_del(bs);
|
||||
s->cache_clean_interval = r->cache_clean_interval;
|
||||
|
@ -133,6 +133,7 @@
|
||||
#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
|
||||
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
|
||||
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
|
||||
#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
|
||||
#define QCOW2_OPT_OVERLAP "overlap-check"
|
||||
#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
|
||||
#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
|
||||
@ -385,6 +386,8 @@ typedef struct BDRVQcow2State {
|
||||
|
||||
bool discard_passthrough[QCOW2_DISCARD_MAX];
|
||||
|
||||
bool discard_no_unref;
|
||||
|
||||
int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
|
||||
bool signaled_corruption;
|
||||
|
||||
|
@ -222,13 +222,11 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
|
||||
|
||||
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
|
||||
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
|
||||
int qemu_iovec_init_extended(
|
||||
QEMUIOVector *qiov,
|
||||
void *head_buf, size_t head_len,
|
||||
QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
|
||||
void *tail_buf, size_t tail_len);
|
||||
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
|
||||
size_t offset, size_t len);
|
||||
struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
|
||||
size_t offset, size_t len,
|
||||
size_t *head, size_t *tail, int *niov);
|
||||
int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
|
||||
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
|
||||
void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
|
@ -3478,6 +3478,17 @@
|
||||
# @pass-discard-other: whether discard requests for the data source
|
||||
# should be issued on other occasions where a cluster gets freed
|
||||
#
|
||||
# @discard-no-unref: when enabled, discards from the guest will not cause
|
||||
# cluster allocations to be relinquished. This prevents qcow2 fragmentation
|
||||
# that would be caused by such discards. Besides potential
|
||||
# performance degradation, such fragmentation can lead to increased
|
||||
# allocation of clusters past the end of the image file,
|
||||
# resulting in image files whose file length can grow much larger
|
||||
# than their guest disk size would suggest.
|
||||
# If image file length is of concern (e.g. when storing qcow2
|
||||
# images directly on block devices), you should consider enabling
|
||||
# this option. (since 8.1)
|
||||
#
|
||||
# @overlap-check: which overlap checks to perform for writes to the
|
||||
# image, defaults to 'cached' (since 2.2)
|
||||
#
|
||||
@ -3516,6 +3527,7 @@
|
||||
'*pass-discard-request': 'bool',
|
||||
'*pass-discard-snapshot': 'bool',
|
||||
'*pass-discard-other': 'bool',
|
||||
'*discard-no-unref': 'bool',
|
||||
'*overlap-check': 'Qcow2OverlapChecks',
|
||||
'*cache-size': 'int',
|
||||
'*l2-cache-size': 'int',
|
||||
|
@ -1431,6 +1431,18 @@ SRST
|
||||
issued on other occasions where a cluster gets freed
|
||||
(on/off; default: off)
|
||||
|
||||
``discard-no-unref``
|
||||
When enabled, discards from the guest will not cause cluster
|
||||
allocations to be relinquished. This prevents qcow2 fragmentation
|
||||
that would be caused by such discards. Besides potential
|
||||
performance degradation, such fragmentation can lead to increased
|
||||
allocation of clusters past the end of the image file,
|
||||
resulting in image files whose file length can grow much larger
|
||||
than their guest disk size would suggest.
|
||||
If image file length is of concern (e.g. when storing qcow2
|
||||
images directly on block devices), you should consider enabling
|
||||
this option.
|
||||
|
||||
``overlap-check``
|
||||
Which overlap checks to perform for writes to the image
|
||||
(none/constant/cached/all; default: cached). For details or
|
||||
|
85
tests/qemu-iotests/tests/iov-padding
Executable file
85
tests/qemu-iotests/tests/iov-padding
Executable file
@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
# group: rw quick
|
||||
#
|
||||
# Check the interaction of request padding (to fit alignment restrictions) with
|
||||
# vectored I/O from the guest
|
||||
#
|
||||
# Copyright Red Hat
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
seq=$(basename $0)
|
||||
echo "QA output created by $seq"
|
||||
|
||||
status=1 # failure is the default!
|
||||
|
||||
_cleanup()
|
||||
{
|
||||
_cleanup_test_img
|
||||
}
|
||||
trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
|
||||
# get standard environment, filters and checks
|
||||
cd ..
|
||||
. ./common.rc
|
||||
. ./common.filter
|
||||
|
||||
_supported_fmt raw
|
||||
_supported_proto file
|
||||
|
||||
_make_test_img 1M
|
||||
|
||||
IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
|
||||
|
||||
# Four combinations:
|
||||
# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
|
||||
# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
|
||||
# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
|
||||
# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
|
||||
for start_offset in 4096 512; do
|
||||
for last_element_length in 512 4096; do
|
||||
length=$((1023 * 512 + $last_element_length))
|
||||
|
||||
echo
|
||||
echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
|
||||
|
||||
# Fill with data for testing
|
||||
$QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
|
||||
|
||||
# 1023 512-byte buffers, and then one with length $last_element_length
|
||||
cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
|
||||
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
-c "writev $cmd_params" \
|
||||
--image-opts \
|
||||
"$IMGSPEC" \
|
||||
| _filter_qemu_io
|
||||
|
||||
# Read all patterns -- read the part we just wrote with writev twice,
|
||||
# once "normally", and once with a readv, so we see that that works, too
|
||||
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
-c "read -P 1 0 $start_offset" \
|
||||
-c "read -P 2 $start_offset $length" \
|
||||
-c "readv $cmd_params" \
|
||||
-c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
|
||||
--image-opts \
|
||||
"$IMGSPEC" \
|
||||
| _filter_qemu_io
|
||||
done
|
||||
done
|
||||
|
||||
# success, all done
|
||||
echo "*** done"
|
||||
rm -f $seq.full
|
||||
status=0
|
59
tests/qemu-iotests/tests/iov-padding.out
Normal file
59
tests/qemu-iotests/tests/iov-padding.out
Normal file
@ -0,0 +1,59 @@
|
||||
QA output created by iov-padding
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
|
||||
== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
|
||||
wrote 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 524288/524288 bytes at offset 4096
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 4096/4096 bytes at offset 0
|
||||
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 524288/524288 bytes at offset 4096
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 524288/524288 bytes at offset 4096
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 520192/520192 bytes at offset 528384
|
||||
508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
|
||||
== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
|
||||
wrote 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 527872/527872 bytes at offset 4096
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 4096/4096 bytes at offset 0
|
||||
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 527872/527872 bytes at offset 4096
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 527872/527872 bytes at offset 4096
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 516608/516608 bytes at offset 531968
|
||||
504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
|
||||
== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
|
||||
wrote 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 524288/524288 bytes at offset 512
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 512/512 bytes at offset 0
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 524288/524288 bytes at offset 512
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 524288/524288 bytes at offset 512
|
||||
512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 523776/523776 bytes at offset 524800
|
||||
511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
|
||||
== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
|
||||
wrote 1048576/1048576 bytes at offset 0
|
||||
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
wrote 527872/527872 bytes at offset 512
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 512/512 bytes at offset 0
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 527872/527872 bytes at offset 512
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 527872/527872 bytes at offset 512
|
||||
515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
read 520192/520192 bytes at offset 528384
|
||||
508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
*** done
|
89
util/iov.c
89
util/iov.c
@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
|
||||
}
|
||||
|
||||
/*
|
||||
* qiov_slice
|
||||
* qemu_iovec_slice
|
||||
*
|
||||
* Find subarray of iovec's, containing requested range. @head would
|
||||
* be offset in first iov (returned by the function), @tail would be
|
||||
* count of extra bytes in last iovec (returned iov + @niov - 1).
|
||||
*/
|
||||
static struct iovec *qiov_slice(QEMUIOVector *qiov,
|
||||
size_t offset, size_t len,
|
||||
size_t *head, size_t *tail, int *niov)
|
||||
struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
|
||||
size_t offset, size_t len,
|
||||
size_t *head, size_t *tail, int *niov)
|
||||
{
|
||||
struct iovec *iov, *end_iov;
|
||||
|
||||
@ -411,75 +411,11 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
|
||||
size_t head, tail;
|
||||
int niov;
|
||||
|
||||
qiov_slice(qiov, offset, len, &head, &tail, &niov);
|
||||
qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
|
||||
|
||||
return niov;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
|
||||
* and @tail_buf buffer into new qiov.
|
||||
*/
|
||||
int qemu_iovec_init_extended(
|
||||
QEMUIOVector *qiov,
|
||||
void *head_buf, size_t head_len,
|
||||
QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
|
||||
void *tail_buf, size_t tail_len)
|
||||
{
|
||||
size_t mid_head, mid_tail;
|
||||
int total_niov, mid_niov = 0;
|
||||
struct iovec *p, *mid_iov = NULL;
|
||||
|
||||
assert(mid_qiov->niov <= IOV_MAX);
|
||||
|
||||
if (SIZE_MAX - head_len < mid_len ||
|
||||
SIZE_MAX - head_len - mid_len < tail_len)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mid_len) {
|
||||
mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
|
||||
&mid_head, &mid_tail, &mid_niov);
|
||||
}
|
||||
|
||||
total_niov = !!head_len + mid_niov + !!tail_len;
|
||||
if (total_niov > IOV_MAX) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (total_niov == 1) {
|
||||
qemu_iovec_init_buf(qiov, NULL, 0);
|
||||
p = &qiov->local_iov;
|
||||
} else {
|
||||
qiov->niov = qiov->nalloc = total_niov;
|
||||
qiov->size = head_len + mid_len + tail_len;
|
||||
p = qiov->iov = g_new(struct iovec, qiov->niov);
|
||||
}
|
||||
|
||||
if (head_len) {
|
||||
p->iov_base = head_buf;
|
||||
p->iov_len = head_len;
|
||||
p++;
|
||||
}
|
||||
|
||||
assert(!mid_niov == !mid_len);
|
||||
if (mid_niov) {
|
||||
memcpy(p, mid_iov, mid_niov * sizeof(*p));
|
||||
p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
|
||||
p[0].iov_len -= mid_head;
|
||||
p[mid_niov - 1].iov_len -= mid_tail;
|
||||
p += mid_niov;
|
||||
}
|
||||
|
||||
if (tail_len) {
|
||||
p->iov_base = tail_buf;
|
||||
p->iov_len = tail_len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the contents of subrange of qiov data is all zeroes.
|
||||
*/
|
||||
@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
|
||||
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
|
||||
size_t offset, size_t len)
|
||||
{
|
||||
int ret;
|
||||
struct iovec *slice_iov;
|
||||
int slice_niov;
|
||||
size_t slice_head, slice_tail;
|
||||
|
||||
assert(source->size >= len);
|
||||
assert(source->size - len >= offset);
|
||||
|
||||
/* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
|
||||
ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
|
||||
assert(ret == 0);
|
||||
slice_iov = qemu_iovec_slice(source, offset, len,
|
||||
&slice_head, &slice_tail, &slice_niov);
|
||||
if (slice_niov == 1) {
|
||||
qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
|
||||
} else {
|
||||
qemu_iovec_init(qiov, slice_niov);
|
||||
qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_iovec_destroy(QEMUIOVector *qiov)
|
||||
|
Loading…
Reference in New Issue
Block a user