libext2fs: fix unix_io's Direct I/O support

The previous Direct I/O support worked on HDD's with 512 byte logical
sector sizes, and on FreeBSD which required 4k aligned memory buffers.
However, it was incomplete and was not correctly working on HDD's with
4k logical sector sizes (aka Advanced Format Disks).

Based on a patch from Alexey Lyashkov <alexey.lyashkov@hpe.com> but
rewritten to work with the latest e2fsprogs and to minimize changes to
make it easier to review.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reported-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
This commit is contained in:
Theodore Ts'o 2021-02-26 17:41:06 -05:00
parent c176c240cb
commit c001596110
2 changed files with 70 additions and 23 deletions

View File

@ -134,9 +134,11 @@ errcode_t io_channel_alloc_buf(io_channel io, int count, void *ptr)
else else
size = -count; size = -count;
if (io->align) if (io->align) {
if (io->align > size)
size = io->align;
return ext2fs_get_memalign(size, io->align, ptr); return ext2fs_get_memalign(size, io->align, ptr);
else } else
return ext2fs_get_mem(size, ptr); return ext2fs_get_mem(size, ptr);
} }

View File

@ -218,6 +218,8 @@ static errcode_t raw_read_blk(io_channel channel,
int actual = 0; int actual = 0;
unsigned char *buf = bufv; unsigned char *buf = bufv;
ssize_t really_read = 0; ssize_t really_read = 0;
unsigned long long aligned_blk;
int align_size, offset;
size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size; size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size;
mutex_lock(data, STATS_MTX); mutex_lock(data, STATS_MTX);
@ -226,7 +228,7 @@ static errcode_t raw_read_blk(io_channel channel,
location = ((ext2_loff_t) block * channel->block_size) + data->offset; location = ((ext2_loff_t) block * channel->block_size) + data->offset;
if (data->flags & IO_FLAG_FORCE_BOUNCE) { if (data->flags & IO_FLAG_FORCE_BOUNCE) {
if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out; goto error_out;
} }
@ -237,6 +239,7 @@ static errcode_t raw_read_blk(io_channel channel,
/* Try an aligned pread */ /* Try an aligned pread */
if ((channel->align == 0) || if ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align))) { IS_ALIGNED(size, channel->align))) {
actual = pread64(data->dev, buf, size, location); actual = pread64(data->dev, buf, size, location);
if (actual == size) if (actual == size)
@ -248,6 +251,7 @@ static errcode_t raw_read_blk(io_channel channel,
if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
((channel->align == 0) || ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align)))) { IS_ALIGNED(size, channel->align)))) {
actual = pread(data->dev, buf, size, location); actual = pread(data->dev, buf, size, location);
if (actual == size) if (actual == size)
@ -256,12 +260,13 @@ static errcode_t raw_read_blk(io_channel channel,
} }
#endif /* HAVE_PREAD */ #endif /* HAVE_PREAD */
if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out; goto error_out;
} }
if ((channel->align == 0) || if ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align))) { IS_ALIGNED(size, channel->align))) {
actual = read(data->dev, buf, size); actual = read(data->dev, buf, size);
if (actual != size) { if (actual != size) {
@ -286,23 +291,39 @@ static errcode_t raw_read_blk(io_channel channel,
* to the O_DIRECT rules, so we need to do this the hard way... * to the O_DIRECT rules, so we need to do this the hard way...
*/ */
bounce_read: bounce_read:
if ((channel->block_size > channel->align) &&
(channel->block_size % channel->align) == 0)
align_size = channel->block_size;
else
align_size = channel->align;
aligned_blk = location / align_size;
offset = location % align_size;
if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out;
}
while (size > 0) { while (size > 0) {
mutex_lock(data, BOUNCE_MTX); mutex_lock(data, BOUNCE_MTX);
actual = read(data->dev, data->bounce, channel->block_size); actual = read(data->dev, data->bounce, align_size);
if (actual != channel->block_size) { if (actual != align_size) {
mutex_unlock(data, BOUNCE_MTX); mutex_unlock(data, BOUNCE_MTX);
actual = really_read; actual = really_read;
buf -= really_read; buf -= really_read;
size += really_read; size += really_read;
goto short_read; goto short_read;
} }
if ((actual + offset) > align_size)
actual = align_size - offset;
if (actual > size)
actual = size; actual = size;
if (size > channel->block_size) memcpy(buf, data->bounce + offset, actual);
actual = channel->block_size;
memcpy(buf, data->bounce, actual);
really_read += actual; really_read += actual;
size -= actual; size -= actual;
buf += actual; buf += actual;
offset = 0;
aligned_blk++;
mutex_unlock(data, BOUNCE_MTX); mutex_unlock(data, BOUNCE_MTX);
} }
return 0; return 0;
@ -326,6 +347,8 @@ static errcode_t raw_write_blk(io_channel channel,
int actual = 0; int actual = 0;
errcode_t retval; errcode_t retval;
const unsigned char *buf = bufv; const unsigned char *buf = bufv;
unsigned long long aligned_blk;
int align_size, offset;
if (count == 1) if (count == 1)
size = channel->block_size; size = channel->block_size;
@ -342,7 +365,7 @@ static errcode_t raw_write_blk(io_channel channel,
location = ((ext2_loff_t) block * channel->block_size) + data->offset; location = ((ext2_loff_t) block * channel->block_size) + data->offset;
if (data->flags & IO_FLAG_FORCE_BOUNCE) { if (data->flags & IO_FLAG_FORCE_BOUNCE) {
if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out; goto error_out;
} }
@ -353,6 +376,7 @@ static errcode_t raw_write_blk(io_channel channel,
/* Try an aligned pwrite */ /* Try an aligned pwrite */
if ((channel->align == 0) || if ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align))) { IS_ALIGNED(size, channel->align))) {
actual = pwrite64(data->dev, buf, size, location); actual = pwrite64(data->dev, buf, size, location);
if (actual == size) if (actual == size)
@ -363,6 +387,7 @@ static errcode_t raw_write_blk(io_channel channel,
if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
((channel->align == 0) || ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align)))) { IS_ALIGNED(size, channel->align)))) {
actual = pwrite(data->dev, buf, size, location); actual = pwrite(data->dev, buf, size, location);
if (actual == size) if (actual == size)
@ -370,13 +395,14 @@ static errcode_t raw_write_blk(io_channel channel,
} }
#endif /* HAVE_PWRITE */ #endif /* HAVE_PWRITE */
if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out; goto error_out;
} }
if ((channel->align == 0) || if ((channel->align == 0) ||
(IS_ALIGNED(buf, channel->align) && (IS_ALIGNED(buf, channel->align) &&
IS_ALIGNED(location, channel->align) &&
IS_ALIGNED(size, channel->align))) { IS_ALIGNED(size, channel->align))) {
actual = write(data->dev, buf, size); actual = write(data->dev, buf, size);
if (actual < 0) { if (actual < 0) {
@ -400,40 +426,59 @@ static errcode_t raw_write_blk(io_channel channel,
* to the O_DIRECT rules, so we need to do this the hard way... * to the O_DIRECT rules, so we need to do this the hard way...
*/ */
bounce_write: bounce_write:
if ((channel->block_size > channel->align) &&
(channel->block_size % channel->align) == 0)
align_size = channel->block_size;
else
align_size = channel->align;
aligned_blk = location / align_size;
offset = location % align_size;
while (size > 0) { while (size > 0) {
int actual_w;
mutex_lock(data, BOUNCE_MTX); mutex_lock(data, BOUNCE_MTX);
if (size < channel->block_size) { if (size < align_size || offset) {
if (ext2fs_llseek(data->dev, aligned_blk * align_size,
SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out;
}
actual = read(data->dev, data->bounce, actual = read(data->dev, data->bounce,
channel->block_size); align_size);
if (actual != channel->block_size) { if (actual != align_size) {
if (actual < 0) { if (actual < 0) {
mutex_unlock(data, BOUNCE_MTX); mutex_unlock(data, BOUNCE_MTX);
retval = errno; retval = errno;
goto error_out; goto error_out;
} }
memset((char *) data->bounce + actual, 0, memset((char *) data->bounce + actual, 0,
channel->block_size - actual); align_size - actual);
} }
} }
actual = size; actual = size;
if (size > channel->block_size) if ((actual + offset) > align_size)
actual = channel->block_size; actual = align_size - offset;
memcpy(data->bounce, buf, actual); if (actual > size)
if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { actual = size;
memcpy(((char *)data->bounce) + offset, buf, actual);
if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
goto error_out; goto error_out;
} }
actual = write(data->dev, data->bounce, channel->block_size); actual_w = write(data->dev, data->bounce, align_size);
mutex_unlock(data, BOUNCE_MTX); mutex_unlock(data, BOUNCE_MTX);
if (actual < 0) { if (actual_w < 0) {
retval = errno; retval = errno;
goto error_out; goto error_out;
} }
if (actual != channel->block_size) if (actual_w != align_size)
goto short_write; goto short_write;
size -= actual; size -= actual;
buf += actual; buf += actual;
location += actual; location += actual;
aligned_blk++;
offset = 0;
} }
return 0; return 0;