mirror of
https://github.com/qemu/qemu.git
synced 2024-11-25 20:03:37 +08:00
5fb0a6b5e7
Windows guest sometimes makes DMA requests with overlapping target addresses. This leads to the following structure of iov for the block driver: addr size1 addr size2 addr size3 It means that three adjacent disk blocks should be read into the same memory buffer. Windows does not expects anything from these bytes (should it be data from the first block, or the last one, or some mix), but uses them somehow. It leads to non-determinism of the guest execution, because block driver does not preserve any order of reading. This situation was discusses in the mailing list at least twice: https://lists.gnu.org/archive/html/qemu-devel/2010-09/msg01996.html https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg05185.html This patch makes such disk reads deterministic in icount mode. It splits the whole request into several parts. Parts may overlap, but SGs inside one part do not overlap. Parts that are processed later overwrite the prior ones in case of overlapping. Examples for different SG part sequences: 1) A1 1000 A2 1000 A1 1000 A3 1000 -> One request is split into two. A1 1000 A2 1000 -- A1 1000 A3 1000 2) A1 800 A2 1000 A1 1000 -> A1 800 A2 1000 -- A1 1000 Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> Message-Id: <159117972206.12193.12939621311413561779.stgit@pasha-ThinkPad-X280> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
332 lines
9.0 KiB
C
332 lines
9.0 KiB
C
/*
|
|
* DMA helper functions
|
|
*
|
|
* Copyright (c) 2009 Red Hat
|
|
*
|
|
* This work is licensed under the terms of the GNU General Public License
|
|
* (GNU GPL), version 2 or later.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "sysemu/block-backend.h"
|
|
#include "sysemu/dma.h"
|
|
#include "trace-root.h"
|
|
#include "qemu/thread.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "qemu/range.h"
|
|
|
|
/* #define DEBUG_IOMMU */
|
|
|
|
int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len)
|
|
{
|
|
dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
|
|
|
|
#define FILLBUF_SIZE 512
|
|
uint8_t fillbuf[FILLBUF_SIZE];
|
|
int l;
|
|
bool error = false;
|
|
|
|
memset(fillbuf, c, FILLBUF_SIZE);
|
|
while (len > 0) {
|
|
l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
|
|
error |= address_space_write(as, addr, MEMTXATTRS_UNSPECIFIED,
|
|
fillbuf, l);
|
|
len -= l;
|
|
addr += l;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
|
|
AddressSpace *as)
|
|
{
|
|
qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
|
|
qsg->nsg = 0;
|
|
qsg->nalloc = alloc_hint;
|
|
qsg->size = 0;
|
|
qsg->as = as;
|
|
qsg->dev = dev;
|
|
object_ref(OBJECT(dev));
|
|
}
|
|
|
|
void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
|
|
{
|
|
if (qsg->nsg == qsg->nalloc) {
|
|
qsg->nalloc = 2 * qsg->nalloc + 1;
|
|
qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
|
|
}
|
|
qsg->sg[qsg->nsg].base = base;
|
|
qsg->sg[qsg->nsg].len = len;
|
|
qsg->size += len;
|
|
++qsg->nsg;
|
|
}
|
|
|
|
void qemu_sglist_destroy(QEMUSGList *qsg)
|
|
{
|
|
object_unref(OBJECT(qsg->dev));
|
|
g_free(qsg->sg);
|
|
memset(qsg, 0, sizeof(*qsg));
|
|
}
|
|
|
|
typedef struct {
|
|
BlockAIOCB common;
|
|
AioContext *ctx;
|
|
BlockAIOCB *acb;
|
|
QEMUSGList *sg;
|
|
uint32_t align;
|
|
uint64_t offset;
|
|
DMADirection dir;
|
|
int sg_cur_index;
|
|
dma_addr_t sg_cur_byte;
|
|
QEMUIOVector iov;
|
|
QEMUBH *bh;
|
|
DMAIOFunc *io_func;
|
|
void *io_func_opaque;
|
|
} DMAAIOCB;
|
|
|
|
static void dma_blk_cb(void *opaque, int ret);
|
|
|
|
static void reschedule_dma(void *opaque)
|
|
{
|
|
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
|
|
|
assert(!dbs->acb && dbs->bh);
|
|
qemu_bh_delete(dbs->bh);
|
|
dbs->bh = NULL;
|
|
dma_blk_cb(dbs, 0);
|
|
}
|
|
|
|
static void dma_blk_unmap(DMAAIOCB *dbs)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < dbs->iov.niov; ++i) {
|
|
dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
|
|
dbs->iov.iov[i].iov_len, dbs->dir,
|
|
dbs->iov.iov[i].iov_len);
|
|
}
|
|
qemu_iovec_reset(&dbs->iov);
|
|
}
|
|
|
|
static void dma_complete(DMAAIOCB *dbs, int ret)
|
|
{
|
|
trace_dma_complete(dbs, ret, dbs->common.cb);
|
|
|
|
assert(!dbs->acb && !dbs->bh);
|
|
dma_blk_unmap(dbs);
|
|
if (dbs->common.cb) {
|
|
dbs->common.cb(dbs->common.opaque, ret);
|
|
}
|
|
qemu_iovec_destroy(&dbs->iov);
|
|
qemu_aio_unref(dbs);
|
|
}
|
|
|
|
static void dma_blk_cb(void *opaque, int ret)
|
|
{
|
|
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
|
dma_addr_t cur_addr, cur_len;
|
|
void *mem;
|
|
|
|
trace_dma_blk_cb(dbs, ret);
|
|
|
|
dbs->acb = NULL;
|
|
dbs->offset += dbs->iov.size;
|
|
|
|
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
|
dma_complete(dbs, ret);
|
|
return;
|
|
}
|
|
dma_blk_unmap(dbs);
|
|
|
|
while (dbs->sg_cur_index < dbs->sg->nsg) {
|
|
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
|
|
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
|
|
mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
|
|
/*
|
|
* Make reads deterministic in icount mode. Windows sometimes issues
|
|
* disk read requests with overlapping SGs. It leads
|
|
* to non-determinism, because resulting buffer contents may be mixed
|
|
* from several sectors. This code splits all SGs into several
|
|
* groups. SGs in every group do not overlap.
|
|
*/
|
|
if (mem && use_icount && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
|
|
int i;
|
|
for (i = 0 ; i < dbs->iov.niov ; ++i) {
|
|
if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
|
|
dbs->iov.iov[i].iov_len, (intptr_t)mem,
|
|
cur_len)) {
|
|
dma_memory_unmap(dbs->sg->as, mem, cur_len,
|
|
dbs->dir, cur_len);
|
|
mem = NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!mem)
|
|
break;
|
|
qemu_iovec_add(&dbs->iov, mem, cur_len);
|
|
dbs->sg_cur_byte += cur_len;
|
|
if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
|
|
dbs->sg_cur_byte = 0;
|
|
++dbs->sg_cur_index;
|
|
}
|
|
}
|
|
|
|
if (dbs->iov.size == 0) {
|
|
trace_dma_map_wait(dbs);
|
|
dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
|
cpu_register_map_client(dbs->bh);
|
|
return;
|
|
}
|
|
|
|
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
|
qemu_iovec_discard_back(&dbs->iov,
|
|
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
|
}
|
|
|
|
aio_context_acquire(dbs->ctx);
|
|
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
|
dma_blk_cb, dbs, dbs->io_func_opaque);
|
|
aio_context_release(dbs->ctx);
|
|
assert(dbs->acb);
|
|
}
|
|
|
|
static void dma_aio_cancel(BlockAIOCB *acb)
|
|
{
|
|
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
|
|
|
|
trace_dma_aio_cancel(dbs);
|
|
|
|
assert(!(dbs->acb && dbs->bh));
|
|
if (dbs->acb) {
|
|
/* This will invoke dma_blk_cb. */
|
|
blk_aio_cancel_async(dbs->acb);
|
|
return;
|
|
}
|
|
|
|
if (dbs->bh) {
|
|
cpu_unregister_map_client(dbs->bh);
|
|
qemu_bh_delete(dbs->bh);
|
|
dbs->bh = NULL;
|
|
}
|
|
if (dbs->common.cb) {
|
|
dbs->common.cb(dbs->common.opaque, -ECANCELED);
|
|
}
|
|
}
|
|
|
|
static AioContext *dma_get_aio_context(BlockAIOCB *acb)
|
|
{
|
|
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
|
|
|
|
return dbs->ctx;
|
|
}
|
|
|
|
static const AIOCBInfo dma_aiocb_info = {
|
|
.aiocb_size = sizeof(DMAAIOCB),
|
|
.cancel_async = dma_aio_cancel,
|
|
.get_aio_context = dma_get_aio_context,
|
|
};
|
|
|
|
BlockAIOCB *dma_blk_io(AioContext *ctx,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
DMAIOFunc *io_func, void *io_func_opaque,
|
|
BlockCompletionFunc *cb,
|
|
void *opaque, DMADirection dir)
|
|
{
|
|
DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
|
|
|
|
trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
|
|
|
|
dbs->acb = NULL;
|
|
dbs->sg = sg;
|
|
dbs->ctx = ctx;
|
|
dbs->offset = offset;
|
|
dbs->align = align;
|
|
dbs->sg_cur_index = 0;
|
|
dbs->sg_cur_byte = 0;
|
|
dbs->dir = dir;
|
|
dbs->io_func = io_func;
|
|
dbs->io_func_opaque = io_func_opaque;
|
|
dbs->bh = NULL;
|
|
qemu_iovec_init(&dbs->iov, sg->nsg);
|
|
dma_blk_cb(dbs, 0);
|
|
return &dbs->common;
|
|
}
|
|
|
|
|
|
static
|
|
BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
|
|
BlockCompletionFunc *cb, void *cb_opaque,
|
|
void *opaque)
|
|
{
|
|
BlockBackend *blk = opaque;
|
|
return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
|
|
}
|
|
|
|
BlockAIOCB *dma_blk_read(BlockBackend *blk,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
void (*cb)(void *opaque, int ret), void *opaque)
|
|
{
|
|
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
|
dma_blk_read_io_func, blk, cb, opaque,
|
|
DMA_DIRECTION_FROM_DEVICE);
|
|
}
|
|
|
|
static
|
|
BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
|
|
BlockCompletionFunc *cb, void *cb_opaque,
|
|
void *opaque)
|
|
{
|
|
BlockBackend *blk = opaque;
|
|
return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
|
|
}
|
|
|
|
BlockAIOCB *dma_blk_write(BlockBackend *blk,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
void (*cb)(void *opaque, int ret), void *opaque)
|
|
{
|
|
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
|
dma_blk_write_io_func, blk, cb, opaque,
|
|
DMA_DIRECTION_TO_DEVICE);
|
|
}
|
|
|
|
|
|
static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
|
|
DMADirection dir)
|
|
{
|
|
uint64_t resid;
|
|
int sg_cur_index;
|
|
|
|
resid = sg->size;
|
|
sg_cur_index = 0;
|
|
len = MIN(len, resid);
|
|
while (len > 0) {
|
|
ScatterGatherEntry entry = sg->sg[sg_cur_index++];
|
|
int32_t xfer = MIN(len, entry.len);
|
|
dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
|
|
ptr += xfer;
|
|
len -= xfer;
|
|
resid -= xfer;
|
|
}
|
|
|
|
return resid;
|
|
}
|
|
|
|
uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
|
|
{
|
|
return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
|
|
}
|
|
|
|
uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
|
|
{
|
|
return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
|
|
}
|
|
|
|
void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
|
|
QEMUSGList *sg, enum BlockAcctType type)
|
|
{
|
|
block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
|
|
}
|