mirror of
https://github.com/qemu/qemu.git
synced 2024-11-24 19:33:39 +08:00
Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4599 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
parent
0ac087f1f3
commit
bed5cc5207
@ -70,6 +70,8 @@
|
||||
#define FTYPE_CD 1
|
||||
#define FTYPE_FD 2
|
||||
|
||||
#define ALIGNED_BUFFER_SIZE (32 * 512)
|
||||
|
||||
/* if the FD is not accessed during that time (in ms), we try to
|
||||
reopen it to see if the disk has been changed */
|
||||
#define FD_OPEN_TIMEOUT 1000
|
||||
@ -86,6 +88,9 @@ typedef struct BDRVRawState {
|
||||
int fd_got_error;
|
||||
int fd_media_changed;
|
||||
#endif
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
uint8_t* aligned_buf;
|
||||
#endif
|
||||
} BDRVRawState;
|
||||
|
||||
static int fd_open(BlockDriverState *bs);
|
||||
@ -121,6 +126,17 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
return ret;
|
||||
}
|
||||
s->fd = fd;
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
s->aligned_buf = NULL;
|
||||
if (flags & BDRV_O_DIRECT) {
|
||||
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
|
||||
if (s->aligned_buf == NULL) {
|
||||
ret = -errno;
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -141,7 +157,14 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
#endif
|
||||
*/
|
||||
|
||||
static int raw_pread(BlockDriverState *bs, int64_t offset,
|
||||
/*
|
||||
* offset and count are in bytes, but must be multiples of 512 for files
|
||||
* opened with O_DIRECT. buf must be aligned to 512 bytes then.
|
||||
*
|
||||
* This function may be called without alignment if the caller ensures
|
||||
* that O_DIRECT is not in effect.
|
||||
*/
|
||||
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
|
||||
uint8_t *buf, int count)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@ -194,7 +217,14 @@ label__raw_read__success:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
|
||||
/*
|
||||
* offset and count are in bytes, but must be multiples of 512 for files
|
||||
* opened with O_DIRECT. buf must be aligned to 512 bytes then.
|
||||
*
|
||||
* This function may be called without alignment if the caller ensures
|
||||
* that O_DIRECT is not in effect.
|
||||
*/
|
||||
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
|
||||
const uint8_t *buf, int count)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
@ -230,6 +260,164 @@ label__raw_write__success:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
/*
|
||||
* offset and count are in bytes and possibly not aligned. For files opened
|
||||
* with O_DIRECT, necessary alignments are ensured before calling
|
||||
* raw_pread_aligned to do the actual read.
|
||||
*/
|
||||
static int raw_pread(BlockDriverState *bs, int64_t offset,
|
||||
uint8_t *buf, int count)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
int size, ret, shift, sum;
|
||||
|
||||
sum = 0;
|
||||
|
||||
if (s->aligned_buf != NULL) {
|
||||
|
||||
if (offset & 0x1ff) {
|
||||
/* align offset on a 512 bytes boundary */
|
||||
|
||||
shift = offset & 0x1ff;
|
||||
size = (shift + count + 0x1ff) & ~0x1ff;
|
||||
if (size > ALIGNED_BUFFER_SIZE)
|
||||
size = ALIGNED_BUFFER_SIZE;
|
||||
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
size = 512 - shift;
|
||||
if (size > count)
|
||||
size = count;
|
||||
memcpy(buf, s->aligned_buf + shift, size);
|
||||
|
||||
buf += size;
|
||||
offset += size;
|
||||
count -= size;
|
||||
sum += size;
|
||||
|
||||
if (count == 0)
|
||||
return sum;
|
||||
}
|
||||
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
|
||||
|
||||
/* read on aligned buffer */
|
||||
|
||||
while (count) {
|
||||
|
||||
size = (count + 0x1ff) & ~0x1ff;
|
||||
if (size > ALIGNED_BUFFER_SIZE)
|
||||
size = ALIGNED_BUFFER_SIZE;
|
||||
|
||||
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
size = ret;
|
||||
if (size > count)
|
||||
size = count;
|
||||
|
||||
memcpy(buf, s->aligned_buf, size);
|
||||
|
||||
buf += size;
|
||||
offset += size;
|
||||
count -= size;
|
||||
sum += size;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
return raw_pread_aligned(bs, offset, buf, count) + sum;
|
||||
}
|
||||
|
||||
/*
|
||||
* offset and count are in bytes and possibly not aligned. For files opened
|
||||
* with O_DIRECT, necessary alignments are ensured before calling
|
||||
* raw_pwrite_aligned to do the actual write.
|
||||
*/
|
||||
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
|
||||
const uint8_t *buf, int count)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
int size, ret, shift, sum;
|
||||
|
||||
sum = 0;
|
||||
|
||||
if (s->aligned_buf != NULL) {
|
||||
|
||||
if (offset & 0x1ff) {
|
||||
/* align offset on a 512 bytes boundary */
|
||||
shift = offset & 0x1ff;
|
||||
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
size = 512 - shift;
|
||||
if (size > count)
|
||||
size = count;
|
||||
memcpy(s->aligned_buf + shift, buf, size);
|
||||
|
||||
ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
buf += size;
|
||||
offset += size;
|
||||
count -= size;
|
||||
sum += size;
|
||||
|
||||
if (count == 0)
|
||||
return sum;
|
||||
}
|
||||
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
|
||||
|
||||
while ((size = (count & ~0x1ff)) != 0) {
|
||||
|
||||
if (size > ALIGNED_BUFFER_SIZE)
|
||||
size = ALIGNED_BUFFER_SIZE;
|
||||
|
||||
memcpy(s->aligned_buf, buf, size);
|
||||
|
||||
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
buf += ret;
|
||||
offset += ret;
|
||||
count -= ret;
|
||||
sum += ret;
|
||||
}
|
||||
/* here, count < 512 because (count & ~0x1ff) == 0 */
|
||||
if (count) {
|
||||
ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
memcpy(s->aligned_buf, buf, count);
|
||||
|
||||
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (count < ret)
|
||||
ret = count;
|
||||
|
||||
sum += ret;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
|
||||
}
|
||||
|
||||
#else
|
||||
#define raw_pread raw_pread_aligned
|
||||
#define raw_pwrite raw_pwrite_aligned
|
||||
#endif
|
||||
|
||||
|
||||
/***********************************************************/
|
||||
/* Unix AIO using POSIX AIO */
|
||||
|
||||
@ -237,6 +425,7 @@ typedef struct RawAIOCB {
|
||||
BlockDriverAIOCB common;
|
||||
struct aiocb aiocb;
|
||||
struct RawAIOCB *next;
|
||||
int ret;
|
||||
} RawAIOCB;
|
||||
|
||||
static int aio_sig_num = SIGUSR2;
|
||||
@ -397,12 +586,38 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
|
||||
return acb;
|
||||
}
|
||||
|
||||
#ifndef QEMU_IMG
|
||||
static void raw_aio_em_cb(void* opaque)
|
||||
{
|
||||
RawAIOCB *acb = opaque;
|
||||
acb->common.cb(acb->common.opaque, acb->ret);
|
||||
qemu_aio_release(acb);
|
||||
}
|
||||
#endif
|
||||
|
||||
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
|
||||
int64_t sector_num, uint8_t *buf, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
RawAIOCB *acb;
|
||||
|
||||
/*
|
||||
* If O_DIRECT is used and the buffer is not aligned fall back
|
||||
* to synchronous IO.
|
||||
*/
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
BDRVRawState *s = bs->opaque;
|
||||
|
||||
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
|
||||
QEMUBH *bh;
|
||||
acb = qemu_aio_get(bs, cb, opaque);
|
||||
acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
|
||||
bh = qemu_bh_new(raw_aio_em_cb, acb);
|
||||
qemu_bh_schedule(bh);
|
||||
return &acb->common;
|
||||
}
|
||||
#endif
|
||||
|
||||
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
|
||||
if (!acb)
|
||||
return NULL;
|
||||
@ -419,6 +634,23 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
|
||||
{
|
||||
RawAIOCB *acb;
|
||||
|
||||
/*
|
||||
* If O_DIRECT is used and the buffer is not aligned fall back
|
||||
* to synchronous IO.
|
||||
*/
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
BDRVRawState *s = bs->opaque;
|
||||
|
||||
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
|
||||
QEMUBH *bh;
|
||||
acb = qemu_aio_get(bs, cb, opaque);
|
||||
acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
|
||||
bh = qemu_bh_new(raw_aio_em_cb, acb);
|
||||
qemu_bh_schedule(bh);
|
||||
return &acb->common;
|
||||
}
|
||||
#endif
|
||||
|
||||
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
|
||||
if (!acb)
|
||||
return NULL;
|
||||
@ -462,6 +694,10 @@ static void raw_close(BlockDriverState *bs)
|
||||
if (s->fd >= 0) {
|
||||
close(s->fd);
|
||||
s->fd = -1;
|
||||
#if defined(O_DIRECT) && !defined(QEMU_IMG)
|
||||
if (s->aligned_buf != NULL)
|
||||
qemu_free(s->aligned_buf);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user