Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4599 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
bellard 2008-05-28 09:51:09 +00:00
parent 0ac087f1f3
commit bed5cc5207

View File

@ -70,6 +70,8 @@
#define FTYPE_CD 1
#define FTYPE_FD 2
#define ALIGNED_BUFFER_SIZE (32 * 512)
/* if the FD is not accessed during that time (in ms), we try to
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
@ -86,6 +88,9 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
#if defined(O_DIRECT) && !defined(QEMU_IMG)
uint8_t* aligned_buf;
#endif
} BDRVRawState;
static int fd_open(BlockDriverState *bs);
@ -121,6 +126,17 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
return ret;
}
s->fd = fd;
#if defined(O_DIRECT) && !defined(QEMU_IMG)
s->aligned_buf = NULL;
if (flags & BDRV_O_DIRECT) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
close(fd);
return ret;
}
}
#endif
return 0;
}
@ -141,7 +157,14 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
#endif
*/
static int raw_pread(BlockDriverState *bs, int64_t offset,
/*
* offset and count are in bytes, but must be multiples of 512 for files
* opened with O_DIRECT. buf must be aligned to 512 bytes then.
*
* This function may be called without alignment if the caller ensures
* that O_DIRECT is not in effect.
*/
static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@ -194,7 +217,14 @@ label__raw_read__success:
return ret;
}
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
/*
* offset and count are in bytes, but must be multiples of 512 for files
* opened with O_DIRECT. buf must be aligned to 512 bytes then.
*
* This function may be called without alignment if the caller ensures
* that O_DIRECT is not in effect.
*/
static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@ -230,6 +260,164 @@ label__raw_write__success:
return ret;
}
#if defined(O_DIRECT) && !defined(QEMU_IMG)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
* raw_pread_aligned to do the actual read.
*/
static int raw_pread(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
if (offset & 0x1ff) {
/* align offset on a 512 bytes boundary */
shift = offset & 0x1ff;
size = (shift + count + 0x1ff) & ~0x1ff;
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
if (ret < 0)
return ret;
size = 512 - shift;
if (size > count)
size = count;
memcpy(buf, s->aligned_buf + shift, size);
buf += size;
offset += size;
count -= size;
sum += size;
if (count == 0)
return sum;
}
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
/* read on aligned buffer */
while (count) {
size = (count + 0x1ff) & ~0x1ff;
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
if (ret < 0)
return ret;
size = ret;
if (size > count)
size = count;
memcpy(buf, s->aligned_buf, size);
buf += size;
offset += size;
count -= size;
sum += size;
}
return sum;
}
}
return raw_pread_aligned(bs, offset, buf, count) + sum;
}
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
* raw_pwrite_aligned to do the actual write.
*/
static int raw_pwrite(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
if (offset & 0x1ff) {
/* align offset on a 512 bytes boundary */
shift = offset & 0x1ff;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
if (ret < 0)
return ret;
size = 512 - shift;
if (size > count)
size = count;
memcpy(s->aligned_buf + shift, buf, size);
ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
if (ret < 0)
return ret;
buf += size;
offset += size;
count -= size;
sum += size;
if (count == 0)
return sum;
}
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
while ((size = (count & ~0x1ff)) != 0) {
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
memcpy(s->aligned_buf, buf, size);
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
if (ret < 0)
return ret;
buf += ret;
offset += ret;
count -= ret;
sum += ret;
}
/* here, count < 512 because (count & ~0x1ff) == 0 */
if (count) {
ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
if (ret < 0)
return ret;
memcpy(s->aligned_buf, buf, count);
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
if (ret < 0)
return ret;
if (count < ret)
ret = count;
sum += ret;
}
return sum;
}
}
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
#else
#define raw_pread raw_pread_aligned
#define raw_pwrite raw_pwrite_aligned
#endif
/***********************************************************/
/* Unix AIO using POSIX AIO */
@ -237,6 +425,7 @@ typedef struct RawAIOCB {
BlockDriverAIOCB common;
struct aiocb aiocb;
struct RawAIOCB *next;
int ret;
} RawAIOCB;
static int aio_sig_num = SIGUSR2;
@ -397,12 +586,38 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
return acb;
}
#ifndef QEMU_IMG
static void raw_aio_em_cb(void* opaque)
{
RawAIOCB *acb = opaque;
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_release(acb);
}
#endif
static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
/*
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT) && !defined(QEMU_IMG)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
QEMUBH *bh;
acb = qemu_aio_get(bs, cb, opaque);
acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
bh = qemu_bh_new(raw_aio_em_cb, acb);
qemu_bh_schedule(bh);
return &acb->common;
}
#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@ -419,6 +634,23 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
{
RawAIOCB *acb;
/*
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
#if defined(O_DIRECT) && !defined(QEMU_IMG)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
QEMUBH *bh;
acb = qemu_aio_get(bs, cb, opaque);
acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
bh = qemu_bh_new(raw_aio_em_cb, acb);
qemu_bh_schedule(bh);
return &acb->common;
}
#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
@ -462,6 +694,10 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
#if defined(O_DIRECT) && !defined(QEMU_IMG)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
#endif
}
}