qed: protect table cache with CoMutex

This makes the driver thread-safe.  The CoMutex is dropped temporarily
while accessing the data clusters or the backing file.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20170629132749.997-10-pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
This commit is contained in:
Paolo Bonzini 2017-06-29 15:27:47 +02:00 committed by Fam Zheng
parent 61c7887e0f
commit 1f01e50b83
5 changed files with 129 additions and 54 deletions

View File

@ -85,6 +85,8 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
* *
* On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1 * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
* table offset, respectively. len is number of contiguous unallocated bytes. * table offset, respectively. len is number of contiguous unallocated bytes.
*
* Called with table_lock held.
*/ */
int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
uint64_t pos, size_t *len, uint64_t pos, size_t *len,
@ -112,7 +114,6 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
} }
ret = qed_read_l2_table(s, request, l2_offset); ret = qed_read_l2_table(s, request, l2_offset);
qed_acquire(s);
if (ret) { if (ret) {
goto out; goto out;
} }
@ -137,6 +138,5 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
out: out:
*img_offset = offset; *img_offset = offset;
qed_release(s);
return ret; return ret;
} }

View File

@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
/** /**
* Decrease an entry's reference count and free if necessary when the reference * Decrease an entry's reference count and free if necessary when the reference
* count drops to zero. * count drops to zero.
*
* Called with table_lock held.
*/ */
void qed_unref_l2_cache_entry(CachedL2Table *entry) void qed_unref_l2_cache_entry(CachedL2Table *entry)
{ {
@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
* *
* For a cached entry, this function increases the reference count and returns * For a cached entry, this function increases the reference count and returns
* the entry. * the entry.
*
* Called with table_lock held.
*/ */
CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset) CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
{ {
@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
* N.B. This function steals a reference to the l2_table from the caller so the * N.B. This function steals a reference to the l2_table from the caller so the
* caller must obtain a new reference by issuing a call to * caller must obtain a new reference by issuing a call to
* qed_find_l2_cache_entry(). * qed_find_l2_cache_entry().
*
* Called with table_lock held.
*/ */
void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
{ {

View File

@ -18,6 +18,7 @@
#include "qed.h" #include "qed.h"
#include "qemu/bswap.h" #include "qemu/bswap.h"
/* Called either from qed_check or with table_lock held. */
static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table) static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
{ {
QEMUIOVector qiov; QEMUIOVector qiov;
@ -32,18 +33,22 @@ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
trace_qed_read_table(s, offset, table); trace_qed_read_table(s, offset, table);
if (qemu_in_coroutine()) {
qemu_co_mutex_unlock(&s->table_lock);
}
ret = bdrv_preadv(s->bs->file, offset, &qiov); ret = bdrv_preadv(s->bs->file, offset, &qiov);
if (qemu_in_coroutine()) {
qemu_co_mutex_lock(&s->table_lock);
}
if (ret < 0) { if (ret < 0) {
goto out; goto out;
} }
/* Byteswap offsets */ /* Byteswap offsets */
qed_acquire(s);
noffsets = qiov.size / sizeof(uint64_t); noffsets = qiov.size / sizeof(uint64_t);
for (i = 0; i < noffsets; i++) { for (i = 0; i < noffsets; i++) {
table->offsets[i] = le64_to_cpu(table->offsets[i]); table->offsets[i] = le64_to_cpu(table->offsets[i]);
} }
qed_release(s);
ret = 0; ret = 0;
out: out:
@ -61,6 +66,8 @@ out:
* @index: Index of first element * @index: Index of first element
* @n: Number of elements * @n: Number of elements
* @flush: Whether or not to sync to disk * @flush: Whether or not to sync to disk
*
* Called either from qed_check or with table_lock held.
*/ */
static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
unsigned int index, unsigned int n, bool flush) unsigned int index, unsigned int n, bool flush)
@ -97,16 +104,20 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
/* Adjust for offset into table */ /* Adjust for offset into table */
offset += start * sizeof(uint64_t); offset += start * sizeof(uint64_t);
if (qemu_in_coroutine()) {
qemu_co_mutex_unlock(&s->table_lock);
}
ret = bdrv_pwritev(s->bs->file, offset, &qiov); ret = bdrv_pwritev(s->bs->file, offset, &qiov);
if (qemu_in_coroutine()) {
qemu_co_mutex_lock(&s->table_lock);
}
trace_qed_write_table_cb(s, table, flush, ret); trace_qed_write_table_cb(s, table, flush, ret);
if (ret < 0) { if (ret < 0) {
goto out; goto out;
} }
if (flush) { if (flush) {
qed_acquire(s);
ret = bdrv_flush(s->bs); ret = bdrv_flush(s->bs);
qed_release(s);
if (ret < 0) { if (ret < 0) {
goto out; goto out;
} }
@ -123,6 +134,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
return qed_read_table(s, s->header.l1_table_offset, s->l1_table); return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
} }
/* Called either from qed_check or with table_lock held. */
int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n) int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
{ {
BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
@ -136,6 +148,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
return qed_write_l1_table(s, index, n); return qed_write_l1_table(s, index, n);
} }
/* Called either from qed_check or with table_lock held. */
int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset) int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
{ {
int ret; int ret;
@ -154,7 +167,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
ret = qed_read_table(s, offset, request->l2_table->table); ret = qed_read_table(s, offset, request->l2_table->table);
qed_acquire(s);
if (ret) { if (ret) {
/* can't trust loaded L2 table anymore */ /* can't trust loaded L2 table anymore */
qed_unref_l2_cache_entry(request->l2_table); qed_unref_l2_cache_entry(request->l2_table);
@ -170,7 +182,6 @@ int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
assert(request->l2_table != NULL); assert(request->l2_table != NULL);
} }
qed_release(s);
return ret; return ret;
} }
@ -180,6 +191,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
return qed_read_l2_table(s, request, offset); return qed_read_l2_table(s, request, offset);
} }
/* Called either from qed_check or with table_lock held. */
int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
unsigned int index, unsigned int n, bool flush) unsigned int index, unsigned int n, bool flush)
{ {

View File

@ -93,6 +93,8 @@ int qed_write_header_sync(BDRVQEDState *s)
* *
* This function only updates known header fields in-place and does not affect * This function only updates known header fields in-place and does not affect
* extra data after the QED header. * extra data after the QED header.
*
* No new allocating reqs can start while this function runs.
*/ */
static int coroutine_fn qed_write_header(BDRVQEDState *s) static int coroutine_fn qed_write_header(BDRVQEDState *s)
{ {
@ -109,6 +111,8 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
QEMUIOVector qiov; QEMUIOVector qiov;
int ret; int ret;
assert(s->allocating_acb || s->allocating_write_reqs_plugged);
buf = qemu_blockalign(s->bs, len); buf = qemu_blockalign(s->bs, len);
iov = (struct iovec) { iov = (struct iovec) {
.iov_base = buf, .iov_base = buf,
@ -219,6 +223,8 @@ static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
* This function only produces the offset where the new clusters should be * This function only produces the offset where the new clusters should be
* written. It updates BDRVQEDState but does not make any changes to the image * written. It updates BDRVQEDState but does not make any changes to the image
* file. * file.
*
* Called with table_lock held.
*/ */
static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
{ {
@ -236,6 +242,8 @@ QEDTable *qed_alloc_table(BDRVQEDState *s)
/** /**
* Allocate a new zeroed L2 table * Allocate a new zeroed L2 table
*
* Called with table_lock held.
*/ */
static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
{ {
@ -249,19 +257,32 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
return l2_table; return l2_table;
} }
static void qed_plug_allocating_write_reqs(BDRVQEDState *s) static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
{ {
qemu_co_mutex_lock(&s->table_lock);
/* No reentrancy is allowed. */
assert(!s->allocating_write_reqs_plugged); assert(!s->allocating_write_reqs_plugged);
if (s->allocating_acb != NULL) {
/* Another allocating write came concurrently. This cannot happen
* from bdrv_qed_co_drain, but it can happen when the timer runs.
*/
qemu_co_mutex_unlock(&s->table_lock);
return false;
}
s->allocating_write_reqs_plugged = true; s->allocating_write_reqs_plugged = true;
qemu_co_mutex_unlock(&s->table_lock);
return true;
} }
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
{ {
qemu_co_mutex_lock(&s->table_lock);
assert(s->allocating_write_reqs_plugged); assert(s->allocating_write_reqs_plugged);
s->allocating_write_reqs_plugged = false; s->allocating_write_reqs_plugged = false;
qemu_co_enter_next(&s->allocating_write_reqs); qemu_co_queue_next(&s->allocating_write_reqs);
qemu_co_mutex_unlock(&s->table_lock);
} }
static void coroutine_fn qed_need_check_timer_entry(void *opaque) static void coroutine_fn qed_need_check_timer_entry(void *opaque)
@ -269,17 +290,14 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
BDRVQEDState *s = opaque; BDRVQEDState *s = opaque;
int ret; int ret;
/* The timer should only fire when allocating writes have drained */
assert(!s->allocating_acb);
trace_qed_need_check_timer_cb(s); trace_qed_need_check_timer_cb(s);
qed_acquire(s); if (!qed_plug_allocating_write_reqs(s)) {
qed_plug_allocating_write_reqs(s); return;
}
/* Ensure writes are on disk before clearing flag */ /* Ensure writes are on disk before clearing flag */
ret = bdrv_co_flush(s->bs->file->bs); ret = bdrv_co_flush(s->bs->file->bs);
qed_release(s);
if (ret < 0) { if (ret < 0) {
qed_unplug_allocating_write_reqs(s); qed_unplug_allocating_write_reqs(s);
return; return;
@ -301,16 +319,6 @@ static void qed_need_check_timer_cb(void *opaque)
qemu_coroutine_enter(co); qemu_coroutine_enter(co);
} }
void qed_acquire(BDRVQEDState *s)
{
aio_context_acquire(bdrv_get_aio_context(s->bs));
}
void qed_release(BDRVQEDState *s)
{
aio_context_release(bdrv_get_aio_context(s->bs));
}
static void qed_start_need_check_timer(BDRVQEDState *s) static void qed_start_need_check_timer(BDRVQEDState *s)
{ {
trace_qed_start_need_check_timer(s); trace_qed_start_need_check_timer(s);
@ -369,6 +377,7 @@ static void bdrv_qed_init_state(BlockDriverState *bs)
memset(s, 0, sizeof(BDRVQEDState)); memset(s, 0, sizeof(BDRVQEDState));
s->bs = bs; s->bs = bs;
qemu_co_mutex_init(&s->table_lock);
qemu_co_queue_init(&s->allocating_write_reqs); qemu_co_queue_init(&s->allocating_write_reqs);
} }
@ -688,6 +697,7 @@ typedef struct {
BlockDriverState **file; BlockDriverState **file;
} QEDIsAllocatedCB; } QEDIsAllocatedCB;
/* Called with table_lock held. */
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
{ {
QEDIsAllocatedCB *cb = opaque; QEDIsAllocatedCB *cb = opaque;
@ -735,6 +745,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
uint64_t offset; uint64_t offset;
int ret; int ret;
qemu_co_mutex_lock(&s->table_lock);
ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
qed_is_allocated_cb(&cb, ret, offset, len); qed_is_allocated_cb(&cb, ret, offset, len);
@ -742,6 +753,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
assert(cb.status != BDRV_BLOCK_OFFSET_MASK); assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
qed_unref_l2_cache_entry(request.l2_table); qed_unref_l2_cache_entry(request.l2_table);
qemu_co_mutex_unlock(&s->table_lock);
return cb.status; return cb.status;
} }
@ -872,6 +884,8 @@ out:
* *
* The cluster offset may be an allocated byte offset in the image file, the * The cluster offset may be an allocated byte offset in the image file, the
* zero cluster marker, or the unallocated cluster marker. * zero cluster marker, or the unallocated cluster marker.
*
* Called with table_lock held.
*/ */
static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
int index, unsigned int n, int index, unsigned int n,
@ -887,6 +901,7 @@ static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
} }
} }
/* Called with table_lock held. */
static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
{ {
BDRVQEDState *s = acb_to_s(acb); BDRVQEDState *s = acb_to_s(acb);
@ -910,7 +925,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
if (acb == s->allocating_acb) { if (acb == s->allocating_acb) {
s->allocating_acb = NULL; s->allocating_acb = NULL;
if (!qemu_co_queue_empty(&s->allocating_write_reqs)) { if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
qemu_co_enter_next(&s->allocating_write_reqs); qemu_co_queue_next(&s->allocating_write_reqs);
} else if (s->header.features & QED_F_NEED_CHECK) { } else if (s->header.features & QED_F_NEED_CHECK) {
qed_start_need_check_timer(s); qed_start_need_check_timer(s);
} }
@ -919,6 +934,8 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
/** /**
* Update L1 table with new L2 table offset and write it out * Update L1 table with new L2 table offset and write it out
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
{ {
@ -947,6 +964,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
/** /**
* Update L2 table with new cluster offsets and write them out * Update L2 table with new cluster offsets and write them out
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
{ {
@ -983,6 +1002,8 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
/** /**
* Write data to the image file * Write data to the image file
*
* Called with table_lock *not* held.
*/ */
static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
{ {
@ -999,6 +1020,8 @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
/** /**
* Populate untouched regions of new data cluster * Populate untouched regions of new data cluster
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
{ {
@ -1006,6 +1029,8 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
uint64_t start, len, offset; uint64_t start, len, offset;
int ret; int ret;
qemu_co_mutex_unlock(&s->table_lock);
/* Populate front untouched region of new data cluster */ /* Populate front untouched region of new data cluster */
start = qed_start_of_cluster(s, acb->cur_pos); start = qed_start_of_cluster(s, acb->cur_pos);
len = qed_offset_into_cluster(s, acb->cur_pos); len = qed_offset_into_cluster(s, acb->cur_pos);
@ -1013,7 +1038,7 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
if (ret < 0) { if (ret < 0) {
return ret; goto out;
} }
/* Populate back untouched region of new data cluster */ /* Populate back untouched region of new data cluster */
@ -1026,12 +1051,12 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
trace_qed_aio_write_postfill(s, acb, start, len, offset); trace_qed_aio_write_postfill(s, acb, start, len, offset);
ret = qed_copy_from_backing_file(s, start, len, offset); ret = qed_copy_from_backing_file(s, start, len, offset);
if (ret < 0) { if (ret < 0) {
return ret; goto out;
} }
ret = qed_aio_write_main(acb); ret = qed_aio_write_main(acb);
if (ret < 0) { if (ret < 0) {
return ret; goto out;
} }
if (s->bs->backing) { if (s->bs->backing) {
@ -1046,12 +1071,11 @@ static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
* cluster and before updating the L2 table. * cluster and before updating the L2 table.
*/ */
ret = bdrv_co_flush(s->bs->file->bs); ret = bdrv_co_flush(s->bs->file->bs);
if (ret < 0) {
return ret;
}
} }
return 0; out:
qemu_co_mutex_lock(&s->table_lock);
return ret;
} }
/** /**
@ -1074,6 +1098,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
* @len: Length in bytes * @len: Length in bytes
* *
* This path is taken when writing to previously unallocated clusters. * This path is taken when writing to previously unallocated clusters.
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{ {
@ -1088,7 +1114,7 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
/* Freeze this request if another allocating write is in progress */ /* Freeze this request if another allocating write is in progress */
if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) { if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
if (s->allocating_acb != NULL) { if (s->allocating_acb != NULL) {
qemu_co_queue_wait(&s->allocating_write_reqs, NULL); qemu_co_queue_wait(&s->allocating_write_reqs, &s->table_lock);
assert(s->allocating_acb == NULL); assert(s->allocating_acb == NULL);
} }
s->allocating_acb = acb; s->allocating_acb = acb;
@ -1135,10 +1161,17 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
* @len: Length in bytes * @len: Length in bytes
* *
* This path is taken when writing to already allocated clusters. * This path is taken when writing to already allocated clusters.
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
size_t len) size_t len)
{ {
BDRVQEDState *s = acb_to_s(acb);
int r;
qemu_co_mutex_unlock(&s->table_lock);
/* Allocate buffer for zero writes */ /* Allocate buffer for zero writes */
if (acb->flags & QED_AIOCB_ZERO) { if (acb->flags & QED_AIOCB_ZERO) {
struct iovec *iov = acb->qiov->iov; struct iovec *iov = acb->qiov->iov;
@ -1146,7 +1179,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
if (!iov->iov_base) { if (!iov->iov_base) {
iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len); iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
if (iov->iov_base == NULL) { if (iov->iov_base == NULL) {
return -ENOMEM; r = -ENOMEM;
goto out;
} }
memset(iov->iov_base, 0, iov->iov_len); memset(iov->iov_base, 0, iov->iov_len);
} }
@ -1156,8 +1190,11 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
acb->cur_cluster = offset; acb->cur_cluster = offset;
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
/* Do the actual write */ /* Do the actual write. */
return qed_aio_write_main(acb); r = qed_aio_write_main(acb);
out:
qemu_co_mutex_lock(&s->table_lock);
return r;
} }
/** /**
@ -1167,6 +1204,8 @@ static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes * @offset: Cluster offset in bytes
* @len: Length in bytes * @len: Length in bytes
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_write_data(void *opaque, int ret, static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
uint64_t offset, size_t len) uint64_t offset, size_t len)
@ -1198,6 +1237,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
* @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1 * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2 or QED_CLUSTER_L1
* @offset: Cluster offset in bytes * @offset: Cluster offset in bytes
* @len: Length in bytes * @len: Length in bytes
*
* Called with table_lock held.
*/ */
static int coroutine_fn qed_aio_read_data(void *opaque, int ret, static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
uint64_t offset, size_t len) uint64_t offset, size_t len)
@ -1205,6 +1246,9 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
QEDAIOCB *acb = opaque; QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb); BDRVQEDState *s = acb_to_s(acb);
BlockDriverState *bs = acb->bs; BlockDriverState *bs = acb->bs;
int r;
qemu_co_mutex_unlock(&s->table_lock);
/* Adjust offset into cluster */ /* Adjust offset into cluster */
offset += qed_offset_into_cluster(s, acb->cur_pos); offset += qed_offset_into_cluster(s, acb->cur_pos);
@ -1213,22 +1257,23 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
/* Handle zero cluster and backing file reads */ /* Handle zero cluster and backing file reads, otherwise read
* data cluster directly.
*/
if (ret == QED_CLUSTER_ZERO) { if (ret == QED_CLUSTER_ZERO) {
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
return 0; r = 0;
} else if (ret != QED_CLUSTER_FOUND) { } else if (ret != QED_CLUSTER_FOUND) {
return qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
&acb->backing_qiov); &acb->backing_qiov);
} else {
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
&acb->cur_qiov, 0);
} }
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); qemu_co_mutex_lock(&s->table_lock);
ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, return r;
&acb->cur_qiov, 0);
if (ret < 0) {
return ret;
}
return 0;
} }
/** /**
@ -1241,6 +1286,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
size_t len; size_t len;
int ret; int ret;
qemu_co_mutex_lock(&s->table_lock);
while (1) { while (1) {
trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size); trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
@ -1280,6 +1326,7 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
trace_qed_aio_complete(s, acb, ret); trace_qed_aio_complete(s, acb, ret);
qed_aio_complete(acb); qed_aio_complete(acb);
qemu_co_mutex_unlock(&s->table_lock);
return ret; return ret;
} }
@ -1469,13 +1516,20 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp) static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
{ {
BDRVQEDState *s = bs->opaque;
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret;
bdrv_qed_close(bs); bdrv_qed_close(bs);
bdrv_qed_init_state(bs); bdrv_qed_init_state(bs);
if (qemu_in_coroutine()) {
qemu_co_mutex_lock(&s->table_lock);
}
ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err); ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
if (qemu_in_coroutine()) {
qemu_co_mutex_unlock(&s->table_lock);
}
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
error_prepend(errp, "Could not reopen qed layer: "); error_prepend(errp, "Could not reopen qed layer: ");

View File

@ -151,15 +151,21 @@ typedef struct QEDAIOCB {
typedef struct { typedef struct {
BlockDriverState *bs; /* device */ BlockDriverState *bs; /* device */
uint64_t file_size; /* length of image file, in bytes */
/* Written only by an allocating write or the timer handler (the latter
* while allocating reqs are plugged).
*/
QEDHeader header; /* always cpu-endian */ QEDHeader header; /* always cpu-endian */
/* Protected by table_lock. */
CoMutex table_lock;
QEDTable *l1_table; QEDTable *l1_table;
L2TableCache l2_cache; /* l2 table cache */ L2TableCache l2_cache; /* l2 table cache */
uint32_t table_nelems; uint32_t table_nelems;
uint32_t l1_shift; uint32_t l1_shift;
uint32_t l2_shift; uint32_t l2_shift;
uint32_t l2_mask; uint32_t l2_mask;
uint64_t file_size; /* length of image file, in bytes */
/* Allocating write request queue */ /* Allocating write request queue */
QEDAIOCB *allocating_acb; QEDAIOCB *allocating_acb;
@ -177,9 +183,6 @@ enum {
QED_CLUSTER_L1, /* cluster missing in L1 */ QED_CLUSTER_L1, /* cluster missing in L1 */
}; };
void qed_acquire(BDRVQEDState *s);
void qed_release(BDRVQEDState *s);
/** /**
* Header functions * Header functions
*/ */