2007-10-22 09:03:38 +08:00
|
|
|
//#define DEBUG
|
|
|
|
#include <linux/spinlock.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2007-10-22 09:03:38 +08:00
|
|
|
#include <linux/blkdev.h>
|
|
|
|
#include <linux/hdreg.h>
|
2011-07-02 03:56:05 +08:00
|
|
|
#include <linux/module.h>
|
2012-01-12 13:14:44 +08:00
|
|
|
#include <linux/mutex.h>
|
2007-10-22 09:03:38 +08:00
|
|
|
#include <linux/virtio.h>
|
|
|
|
#include <linux/virtio_blk.h>
|
2007-10-24 19:21:21 +08:00
|
|
|
#include <linux/scatterlist.h>
|
2011-02-02 04:43:48 +08:00
|
|
|
#include <linux/string_helpers.h>
|
2011-04-24 02:49:26 +08:00
|
|
|
#include <scsi/scsi_cmnd.h>
|
2011-10-31 03:29:59 +08:00
|
|
|
#include <linux/idr.h>
|
2013-11-02 00:52:52 +08:00
|
|
|
#include <linux/blk-mq.h>
|
|
|
|
#include <linux/numa.h>
|
2007-10-24 19:21:21 +08:00
|
|
|
|
2008-01-31 22:53:53 +08:00
|
|
|
#define PART_BITS 4
|
2014-06-26 17:41:48 +08:00
|
|
|
#define VQ_NAME_LEN 16
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2011-10-31 03:29:59 +08:00
|
|
|
static int major;
|
|
|
|
static DEFINE_IDA(vd_index_ida);
|
|
|
|
|
2013-05-20 08:55:39 +08:00
|
|
|
static struct workqueue_struct *virtblk_wq;
|
2008-01-31 22:53:53 +08:00
|
|
|
|
2014-06-26 17:41:48 +08:00
|
|
|
struct virtio_blk_vq {
|
|
|
|
struct virtqueue *vq;
|
|
|
|
spinlock_t lock;
|
|
|
|
char name[VQ_NAME_LEN];
|
|
|
|
} ____cacheline_aligned_in_smp;
|
|
|
|
|
2015-01-15 19:33:31 +08:00
|
|
|
struct virtio_blk {
|
2007-10-22 09:03:38 +08:00
|
|
|
struct virtio_device *vdev;
|
|
|
|
|
|
|
|
/* The disk structure for the kernel. */
|
|
|
|
struct gendisk *disk;
|
|
|
|
|
2014-04-16 04:14:00 +08:00
|
|
|
/* Block layer tags. */
|
|
|
|
struct blk_mq_tag_set tag_set;
|
|
|
|
|
2011-02-02 04:43:48 +08:00
|
|
|
/* Process context for config space updates */
|
|
|
|
struct work_struct config_work;
|
|
|
|
|
2008-12-30 23:26:05 +08:00
|
|
|
/* What host tells us, plus 2 for header & tailer. */
|
|
|
|
unsigned int sg_elems;
|
|
|
|
|
2011-10-31 03:29:59 +08:00
|
|
|
/* Ida index - used to track minor number allocations. */
|
|
|
|
int index;
|
2014-06-26 17:41:48 +08:00
|
|
|
|
|
|
|
/* num of vqs */
|
|
|
|
int num_vqs;
|
|
|
|
struct virtio_blk_vq *vqs;
|
2007-10-22 09:03:38 +08:00
|
|
|
};
|
|
|
|
|
2015-01-15 19:33:31 +08:00
|
|
|
struct virtblk_req {
|
2007-10-22 09:03:38 +08:00
|
|
|
struct request *req;
|
|
|
|
struct virtio_blk_outhdr out_hdr;
|
2009-05-18 20:41:30 +08:00
|
|
|
struct virtio_scsi_inhdr in_hdr;
|
2008-05-03 10:50:45 +08:00
|
|
|
u8 status;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
struct scatterlist sg[];
|
2007-10-22 09:03:38 +08:00
|
|
|
};
|
|
|
|
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
static inline int virtblk_result(struct virtblk_req *vbr)
|
|
|
|
{
|
|
|
|
switch (vbr->status) {
|
|
|
|
case VIRTIO_BLK_S_OK:
|
|
|
|
return 0;
|
|
|
|
case VIRTIO_BLK_S_UNSUPP:
|
|
|
|
return -ENOTTY;
|
|
|
|
default:
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-20 13:14:27 +08:00
|
|
|
static int __virtblk_add_req(struct virtqueue *vq,
|
2013-03-20 13:14:27 +08:00
|
|
|
struct virtblk_req *vbr,
|
|
|
|
struct scatterlist *data_sg,
|
2013-03-20 13:14:27 +08:00
|
|
|
bool have_data)
|
2012-08-08 16:07:05 +08:00
|
|
|
{
|
2013-03-20 13:14:27 +08:00
|
|
|
struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
|
2013-03-20 13:14:27 +08:00
|
|
|
unsigned int num_out = 0, num_in = 0;
|
2014-10-07 22:39:49 +08:00
|
|
|
__virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT);
|
2013-03-20 13:14:27 +08:00
|
|
|
|
|
|
|
sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
|
|
|
|
sgs[num_out++] = &hdr;
|
|
|
|
|
2013-03-20 13:14:27 +08:00
|
|
|
/*
|
|
|
|
* If this is a packet command we need a couple of additional headers.
|
|
|
|
* Behind the normal outhdr we put a segment with the scsi command
|
|
|
|
* block, and before the normal inhdr we put the sense data and the
|
|
|
|
* inhdr with additional status information.
|
|
|
|
*/
|
2014-10-07 22:39:49 +08:00
|
|
|
if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
|
2013-03-20 13:14:27 +08:00
|
|
|
sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
|
|
|
|
sgs[num_out++] = &cmd;
|
|
|
|
}
|
|
|
|
|
2013-03-20 13:14:27 +08:00
|
|
|
if (have_data) {
|
2014-10-07 22:39:49 +08:00
|
|
|
if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
|
2013-03-20 13:14:27 +08:00
|
|
|
sgs[num_out++] = data_sg;
|
2013-03-20 13:14:27 +08:00
|
|
|
else
|
2013-03-20 13:14:27 +08:00
|
|
|
sgs[num_out + num_in++] = data_sg;
|
|
|
|
}
|
|
|
|
|
2014-10-07 22:39:49 +08:00
|
|
|
if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
|
2013-03-20 13:14:27 +08:00
|
|
|
sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
|
|
|
|
sgs[num_out + num_in++] = &sense;
|
|
|
|
sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
|
|
|
|
sgs[num_out + num_in++] = &inhdr;
|
2013-03-20 13:14:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
sg_init_one(&status, &vbr->status, sizeof(vbr->status));
|
|
|
|
sgs[num_out + num_in++] = &status;
|
|
|
|
|
|
|
|
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
|
2013-03-20 13:14:27 +08:00
|
|
|
}
|
|
|
|
|
2014-02-10 19:24:39 +08:00
|
|
|
static inline void virtblk_request_done(struct request *req)
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
{
|
2014-04-14 16:30:07 +08:00
|
|
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
2014-10-07 22:39:49 +08:00
|
|
|
struct virtio_blk *vblk = req->q->queuedata;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
int error = virtblk_result(vbr);
|
|
|
|
|
|
|
|
if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
|
2014-10-07 22:39:49 +08:00
|
|
|
req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
|
|
|
|
req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
|
|
|
|
req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
|
2015-04-18 04:37:16 +08:00
|
|
|
} else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
req->errors = (error != 0);
|
|
|
|
}
|
|
|
|
|
2014-09-14 07:40:10 +08:00
|
|
|
blk_mq_end_request(req, error);
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtblk_done(struct virtqueue *vq)
|
2007-10-22 09:03:38 +08:00
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = vq->vdev->priv;
|
2013-11-02 00:52:52 +08:00
|
|
|
bool req_done = false;
|
2014-06-26 17:41:48 +08:00
|
|
|
int qid = vq->index;
|
2007-10-22 09:03:38 +08:00
|
|
|
struct virtblk_req *vbr;
|
|
|
|
unsigned long flags;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
unsigned int len;
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2014-06-26 17:41:48 +08:00
|
|
|
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
|
virtio-blk: Disable callback in virtblk_done()
This reduces unnecessary interrupts that host could send to guest while
guest is in the progress of irq handling.
If one vcpu is handling the irq, while another interrupt comes, in
handle_edge_irq(), the guest will mask the interrupt via mask_msi_irq()
which is a very heavy operation that goes all the way down to host.
Here are some performance numbers on qemu:
Before:
-------------------------------------
seq-read : io=0 B, bw=269730KB/s, iops=67432 , runt= 62200msec
seq-write : io=0 B, bw=339716KB/s, iops=84929 , runt= 49386msec
rand-read : io=0 B, bw=270435KB/s, iops=67608 , runt= 62038msec
rand-write: io=0 B, bw=354436KB/s, iops=88608 , runt= 47335msec
clat (usec): min=101 , max=138052 , avg=14822.09, stdev=11771.01
clat (usec): min=96 , max=81543 , avg=11798.94, stdev=7735.60
clat (usec): min=128 , max=140043 , avg=14835.85, stdev=11765.33
clat (usec): min=109 , max=147207 , avg=11337.09, stdev=5990.35
cpu : usr=15.93%, sys=60.37%, ctx=7764972, majf=0, minf=54
cpu : usr=32.73%, sys=120.49%, ctx=7372945, majf=0, minf=1
cpu : usr=18.84%, sys=58.18%, ctx=7775420, majf=0, minf=1
cpu : usr=24.20%, sys=59.85%, ctx=8307886, majf=0, minf=0
vdb: ios=8389107/8368136, merge=0/0, ticks=19457874/14616506,
in_queue=34206098, util=99.68%
43: interrupt in total: 887320
fio --exec_prerun="echo 3 > /proc/sys/vm/drop_caches" --group_reporting
--ioscheduler=noop --thread --bs=4k --size=512MB --direct=1 --numjobs=16
--ioengine=libaio --iodepth=64 --loops=3 --ramp_time=0
--filename=/dev/vdb --name=seq-read --stonewall --rw=read
--name=seq-write --stonewall --rw=write --name=rnd-read --stonewall
--rw=randread --name=rnd-write --stonewall --rw=randwrite
After:
-------------------------------------
seq-read : io=0 B, bw=309503KB/s, iops=77375 , runt= 54207msec
seq-write : io=0 B, bw=448205KB/s, iops=112051 , runt= 37432msec
rand-read : io=0 B, bw=311254KB/s, iops=77813 , runt= 53902msec
rand-write: io=0 B, bw=377152KB/s, iops=94287 , runt= 44484msec
clat (usec): min=81 , max=90588 , avg=12946.06, stdev=9085.94
clat (usec): min=57 , max=72264 , avg=8967.97, stdev=5951.04
clat (usec): min=29 , max=101046 , avg=12889.95, stdev=9067.91
clat (usec): min=52 , max=106152 , avg=10660.56, stdev=4778.19
cpu : usr=15.05%, sys=57.92%, ctx=7710941, majf=0, minf=54
cpu : usr=26.78%, sys=101.40%, ctx=7387891, majf=0, minf=2
cpu : usr=19.03%, sys=58.17%, ctx=7681976, majf=0, minf=8
cpu : usr=24.65%, sys=58.34%, ctx=8442632, majf=0, minf=4
vdb: ios=8389086/8361888, merge=0/0, ticks=17243780/12742010,
in_queue=30078377, util=99.59%
43: interrupt in total: 1259639
fio --exec_prerun="echo 3 > /proc/sys/vm/drop_caches" --group_reporting
--ioscheduler=noop --thread --bs=4k --size=512MB --direct=1 --numjobs=16
--ioengine=libaio --iodepth=64 --loops=3 --ramp_time=0
--filename=/dev/vdb --name=seq-read --stonewall --rw=read
--name=seq-write --stonewall --rw=write --name=rnd-read --stonewall
--rw=randread --name=rnd-write --stonewall --rw=randwrite
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-09-25 10:36:17 +08:00
|
|
|
do {
|
|
|
|
virtqueue_disable_cb(vq);
|
2014-06-26 17:41:48 +08:00
|
|
|
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
|
2015-09-28 03:01:50 +08:00
|
|
|
blk_mq_complete_request(vbr->req, vbr->req->errors);
|
2013-11-02 00:52:52 +08:00
|
|
|
req_done = true;
|
2010-08-08 00:17:56 +08:00
|
|
|
}
|
2013-10-29 07:10:30 +08:00
|
|
|
if (unlikely(virtqueue_is_broken(vq)))
|
|
|
|
break;
|
virtio-blk: Disable callback in virtblk_done()
This reduces unnecessary interrupts that host could send to guest while
guest is in the progress of irq handling.
If one vcpu is handling the irq, while another interrupt comes, in
handle_edge_irq(), the guest will mask the interrupt via mask_msi_irq()
which is a very heavy operation that goes all the way down to host.
Here are some performance numbers on qemu:
Before:
-------------------------------------
seq-read : io=0 B, bw=269730KB/s, iops=67432 , runt= 62200msec
seq-write : io=0 B, bw=339716KB/s, iops=84929 , runt= 49386msec
rand-read : io=0 B, bw=270435KB/s, iops=67608 , runt= 62038msec
rand-write: io=0 B, bw=354436KB/s, iops=88608 , runt= 47335msec
clat (usec): min=101 , max=138052 , avg=14822.09, stdev=11771.01
clat (usec): min=96 , max=81543 , avg=11798.94, stdev=7735.60
clat (usec): min=128 , max=140043 , avg=14835.85, stdev=11765.33
clat (usec): min=109 , max=147207 , avg=11337.09, stdev=5990.35
cpu : usr=15.93%, sys=60.37%, ctx=7764972, majf=0, minf=54
cpu : usr=32.73%, sys=120.49%, ctx=7372945, majf=0, minf=1
cpu : usr=18.84%, sys=58.18%, ctx=7775420, majf=0, minf=1
cpu : usr=24.20%, sys=59.85%, ctx=8307886, majf=0, minf=0
vdb: ios=8389107/8368136, merge=0/0, ticks=19457874/14616506,
in_queue=34206098, util=99.68%
43: interrupt in total: 887320
fio --exec_prerun="echo 3 > /proc/sys/vm/drop_caches" --group_reporting
--ioscheduler=noop --thread --bs=4k --size=512MB --direct=1 --numjobs=16
--ioengine=libaio --iodepth=64 --loops=3 --ramp_time=0
--filename=/dev/vdb --name=seq-read --stonewall --rw=read
--name=seq-write --stonewall --rw=write --name=rnd-read --stonewall
--rw=randread --name=rnd-write --stonewall --rw=randwrite
After:
-------------------------------------
seq-read : io=0 B, bw=309503KB/s, iops=77375 , runt= 54207msec
seq-write : io=0 B, bw=448205KB/s, iops=112051 , runt= 37432msec
rand-read : io=0 B, bw=311254KB/s, iops=77813 , runt= 53902msec
rand-write: io=0 B, bw=377152KB/s, iops=94287 , runt= 44484msec
clat (usec): min=81 , max=90588 , avg=12946.06, stdev=9085.94
clat (usec): min=57 , max=72264 , avg=8967.97, stdev=5951.04
clat (usec): min=29 , max=101046 , avg=12889.95, stdev=9067.91
clat (usec): min=52 , max=106152 , avg=10660.56, stdev=4778.19
cpu : usr=15.05%, sys=57.92%, ctx=7710941, majf=0, minf=54
cpu : usr=26.78%, sys=101.40%, ctx=7387891, majf=0, minf=2
cpu : usr=19.03%, sys=58.17%, ctx=7681976, majf=0, minf=8
cpu : usr=24.65%, sys=58.34%, ctx=8442632, majf=0, minf=4
vdb: ios=8389086/8361888, merge=0/0, ticks=17243780/12742010,
in_queue=30078377, util=99.59%
43: interrupt in total: 1259639
fio --exec_prerun="echo 3 > /proc/sys/vm/drop_caches" --group_reporting
--ioscheduler=noop --thread --bs=4k --size=512MB --direct=1 --numjobs=16
--ioengine=libaio --iodepth=64 --loops=3 --ramp_time=0
--filename=/dev/vdb --name=seq-read --stonewall --rw=read
--name=seq-write --stonewall --rw=write --name=rnd-read --stonewall
--rw=randread --name=rnd-write --stonewall --rw=randwrite
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-09-25 10:36:17 +08:00
|
|
|
} while (!virtqueue_enable_cb(vq));
|
2013-11-02 00:52:52 +08:00
|
|
|
|
2007-10-22 09:03:38 +08:00
|
|
|
/* In case queue is stopped waiting for more buffers. */
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
if (req_done)
|
2014-04-16 15:44:54 +08:00
|
|
|
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
|
2014-06-26 17:41:48 +08:00
|
|
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
}
|
|
|
|
|
2014-10-30 01:14:52 +08:00
|
|
|
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
|
const struct blk_mq_queue_data *bd)
|
2007-10-22 09:03:38 +08:00
|
|
|
{
|
2013-11-02 00:52:52 +08:00
|
|
|
struct virtio_blk *vblk = hctx->queue->queuedata;
|
2014-10-30 01:14:52 +08:00
|
|
|
struct request *req = bd->rq;
|
2014-04-14 16:30:07 +08:00
|
|
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
2013-11-02 00:52:52 +08:00
|
|
|
unsigned long flags;
|
2013-03-20 13:14:27 +08:00
|
|
|
unsigned int num;
|
2014-06-26 17:41:48 +08:00
|
|
|
int qid = hctx->queue_num;
|
2014-03-13 08:53:39 +08:00
|
|
|
int err;
|
2014-05-30 10:49:29 +08:00
|
|
|
bool notify = false;
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2013-11-02 00:52:52 +08:00
|
|
|
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
|
2007-10-22 09:03:38 +08:00
|
|
|
|
|
|
|
vbr->req = req;
|
2016-06-06 03:32:23 +08:00
|
|
|
if (req_op(req) == REQ_OP_FLUSH) {
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
|
2010-03-25 13:33:33 +08:00
|
|
|
vbr->out_hdr.sector = 0;
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
|
2010-07-03 16:45:38 +08:00
|
|
|
} else {
|
|
|
|
switch (req->cmd_type) {
|
|
|
|
case REQ_TYPE_FS:
|
|
|
|
vbr->out_hdr.type = 0;
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req));
|
|
|
|
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
|
2010-07-03 16:45:38 +08:00
|
|
|
break;
|
|
|
|
case REQ_TYPE_BLOCK_PC:
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD);
|
2009-09-18 01:57:42 +08:00
|
|
|
vbr->out_hdr.sector = 0;
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
|
2009-09-18 01:57:42 +08:00
|
|
|
break;
|
2015-04-18 04:37:16 +08:00
|
|
|
case REQ_TYPE_DRV_PRIV:
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
|
2010-07-03 16:45:38 +08:00
|
|
|
vbr->out_hdr.sector = 0;
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
|
2010-07-03 16:45:38 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* We don't put anything else in the queue. */
|
|
|
|
BUG();
|
2009-09-18 01:57:42 +08:00
|
|
|
}
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2014-09-14 07:40:09 +08:00
|
|
|
blk_mq_start_request(req);
|
|
|
|
|
2013-11-02 00:52:52 +08:00
|
|
|
num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
|
2009-05-18 20:41:30 +08:00
|
|
|
if (num) {
|
2013-03-20 13:14:27 +08:00
|
|
|
if (rq_data_dir(vbr->req) == WRITE)
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
|
2013-03-20 13:14:27 +08:00
|
|
|
else
|
2014-10-07 22:39:49 +08:00
|
|
|
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2014-06-26 17:41:48 +08:00
|
|
|
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
|
|
|
|
err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
|
2014-03-13 08:53:39 +08:00
|
|
|
if (err) {
|
2014-06-26 17:41:48 +08:00
|
|
|
virtqueue_kick(vblk->vqs[qid].vq);
|
2013-11-02 00:52:52 +08:00
|
|
|
blk_mq_stop_hw_queue(hctx);
|
2014-06-26 17:41:48 +08:00
|
|
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
2014-03-13 08:53:39 +08:00
|
|
|
/* Out of mem doesn't actually happen, since we fall back
|
|
|
|
* to direct descriptors */
|
|
|
|
if (err == -ENOMEM || err == -ENOSPC)
|
|
|
|
return BLK_MQ_RQ_QUEUE_BUSY;
|
|
|
|
return BLK_MQ_RQ_QUEUE_ERROR;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
}
|
|
|
|
|
2014-10-30 01:14:52 +08:00
|
|
|
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
|
2014-05-30 10:49:29 +08:00
|
|
|
notify = true;
|
2014-06-26 17:41:48 +08:00
|
|
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
2014-05-30 10:49:29 +08:00
|
|
|
|
|
|
|
if (notify)
|
2014-06-26 17:41:48 +08:00
|
|
|
virtqueue_notify(vblk->vqs[qid].vq);
|
2013-11-02 00:52:52 +08:00
|
|
|
return BLK_MQ_RQ_QUEUE_OK;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
}
|
|
|
|
|
2010-03-25 13:33:33 +08:00
|
|
|
/* return id (s/n) string for *disk to *id_str
|
|
|
|
*/
|
|
|
|
static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = disk->private_data;
|
|
|
|
struct request *req;
|
|
|
|
struct bio *bio;
|
2010-10-09 09:42:13 +08:00
|
|
|
int err;
|
2010-03-25 13:33:33 +08:00
|
|
|
|
|
|
|
bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (IS_ERR(bio))
|
|
|
|
return PTR_ERR(bio);
|
|
|
|
|
|
|
|
req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
|
|
|
|
if (IS_ERR(req)) {
|
|
|
|
bio_put(bio);
|
|
|
|
return PTR_ERR(req);
|
|
|
|
}
|
|
|
|
|
2015-04-18 04:37:16 +08:00
|
|
|
req->cmd_type = REQ_TYPE_DRV_PRIV;
|
2010-10-09 09:42:13 +08:00
|
|
|
err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
|
|
|
|
blk_put_request(req);
|
|
|
|
|
|
|
|
return err;
|
2010-03-25 13:33:33 +08:00
|
|
|
}
|
|
|
|
|
2010-09-15 07:27:23 +08:00
|
|
|
static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
|
|
|
|
unsigned int cmd, unsigned long data)
|
2007-10-22 09:03:38 +08:00
|
|
|
{
|
2009-05-18 20:41:30 +08:00
|
|
|
struct gendisk *disk = bdev->bd_disk;
|
|
|
|
struct virtio_blk *vblk = disk->private_data;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only allow the generic SCSI ioctls if the host can support it.
|
|
|
|
*/
|
|
|
|
if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
|
2009-06-21 03:29:41 +08:00
|
|
|
return -ENOTTY;
|
2009-05-18 20:41:30 +08:00
|
|
|
|
2012-01-12 23:01:27 +08:00
|
|
|
return scsi_cmd_blk_ioctl(bdev, mode, cmd,
|
|
|
|
(void __user *)data);
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2008-01-24 00:56:50 +08:00
|
|
|
/* We provide getgeo only to please some old bootloader/partitioning tools */
|
|
|
|
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
|
|
|
|
{
|
2008-04-17 02:56:37 +08:00
|
|
|
struct virtio_blk *vblk = bd->bd_disk->private_data;
|
|
|
|
|
|
|
|
/* see if the host passed in geometry config */
|
2013-10-14 15:41:51 +08:00
|
|
|
if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
|
|
|
|
virtio_cread(vblk->vdev, struct virtio_blk_config,
|
|
|
|
geometry.cylinders, &geo->cylinders);
|
|
|
|
virtio_cread(vblk->vdev, struct virtio_blk_config,
|
|
|
|
geometry.heads, &geo->heads);
|
|
|
|
virtio_cread(vblk->vdev, struct virtio_blk_config,
|
|
|
|
geometry.sectors, &geo->sectors);
|
2008-04-17 02:56:37 +08:00
|
|
|
} else {
|
|
|
|
/* some standard values, similar to sd */
|
|
|
|
geo->heads = 1 << 6;
|
|
|
|
geo->sectors = 1 << 5;
|
|
|
|
geo->cylinders = get_capacity(bd->bd_disk) >> 11;
|
|
|
|
}
|
2008-01-24 00:56:50 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-22 08:01:13 +08:00
|
|
|
static const struct block_device_operations virtblk_fops = {
|
2010-07-08 16:18:46 +08:00
|
|
|
.ioctl = virtblk_ioctl,
|
2008-01-24 00:56:50 +08:00
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.getgeo = virtblk_getgeo,
|
2007-10-22 09:03:38 +08:00
|
|
|
};
|
|
|
|
|
2008-02-01 16:05:00 +08:00
|
|
|
static int index_to_minor(int index)
|
|
|
|
{
|
|
|
|
return index << PART_BITS;
|
|
|
|
}
|
|
|
|
|
2011-10-31 03:29:59 +08:00
|
|
|
static int minor_to_index(int minor)
|
|
|
|
{
|
|
|
|
return minor >> PART_BITS;
|
|
|
|
}
|
|
|
|
|
2010-06-24 11:19:57 +08:00
|
|
|
static ssize_t virtblk_serial_show(struct device *dev,
|
|
|
|
struct device_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* sysfs gives us a PAGE_SIZE buffer */
|
|
|
|
BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
|
|
|
|
|
|
|
|
buf[VIRTIO_BLK_ID_BYTES] = '\0';
|
|
|
|
err = virtblk_get_id(disk, buf);
|
|
|
|
if (!err)
|
|
|
|
return strlen(buf);
|
|
|
|
|
|
|
|
if (err == -EIO) /* Unsupported? Make it empty. */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
2014-10-23 21:08:44 +08:00
|
|
|
|
|
|
|
static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
|
2010-06-24 11:19:57 +08:00
|
|
|
|
2011-02-02 04:43:48 +08:00
|
|
|
static void virtblk_config_changed_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct virtio_blk *vblk =
|
|
|
|
container_of(work, struct virtio_blk, config_work);
|
|
|
|
struct virtio_device *vdev = vblk->vdev;
|
|
|
|
struct request_queue *q = vblk->disk->queue;
|
|
|
|
char cap_str_2[10], cap_str_10[10];
|
2013-03-12 13:04:40 +08:00
|
|
|
char *envp[] = { "RESIZE=1", NULL };
|
sd, mmc, virtio_blk, string_helpers: fix block size units
The current string_get_size() overflows when the device size goes over
2^64 bytes because the string helper routine computes the suffix from
the size in bytes. However, the entirety of Linux thinks in terms of
blocks, not bytes, so this will artificially induce an overflow on very
large devices. Fix this by making the function string_get_size() take
blocks and the block size instead of bytes. This should allow us to
keep working until the current SCSI standard overflows.
Also fix virtio_blk and mmc (both of which were also artificially
multiplying by the block size to pass a byte side to string_get_size()).
The mathematics of this is pretty simple: we're taking a product of
size in blocks (S) and block size (B) and trying to re-express this in
exponential form: S*B = R*N^E (where N, the exponent is either 1000 or
1024) and R < N. Mathematically, S = RS*N^ES and B=RB*N^EB, so if RS*RB
< N it's easy to see that S*B = RS*RB*N^(ES+EB). However, if RS*BS > N,
we can see that this can be re-expressed as RS*BS = R*N (where R =
RS*BS/N < N) so the whole exponent becomes R*N^(ES+EB+1)
[jejb: fix incorrect 32 bit do_div spotted by kbuild test robot <fengguang.wu@intel.com>]
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-03-06 10:47:01 +08:00
|
|
|
u64 capacity;
|
2011-02-02 04:43:48 +08:00
|
|
|
|
|
|
|
/* Host must always specify the capacity. */
|
2013-10-14 15:41:51 +08:00
|
|
|
virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
|
2011-02-02 04:43:48 +08:00
|
|
|
|
|
|
|
/* If capacity is too big, truncate with warning. */
|
|
|
|
if ((sector_t)capacity != capacity) {
|
|
|
|
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
|
|
|
|
(unsigned long long)capacity);
|
|
|
|
capacity = (sector_t)-1;
|
|
|
|
}
|
|
|
|
|
sd, mmc, virtio_blk, string_helpers: fix block size units
The current string_get_size() overflows when the device size goes over
2^64 bytes because the string helper routine computes the suffix from
the size in bytes. However, the entirety of Linux thinks in terms of
blocks, not bytes, so this will artificially induce an overflow on very
large devices. Fix this by making the function string_get_size() take
blocks and the block size instead of bytes. This should allow us to
keep working until the current SCSI standard overflows.
Also fix virtio_blk and mmc (both of which were also artificially
multiplying by the block size to pass a byte side to string_get_size()).
The mathematics of this is pretty simple: we're taking a product of
size in blocks (S) and block size (B) and trying to re-express this in
exponential form: S*B = R*N^E (where N, the exponent is either 1000 or
1024) and R < N. Mathematically, S = RS*N^ES and B=RB*N^EB, so if RS*RB
< N it's easy to see that S*B = RS*RB*N^(ES+EB). However, if RS*BS > N,
we can see that this can be re-expressed as RS*BS = R*N (where R =
RS*BS/N < N) so the whole exponent becomes R*N^(ES+EB+1)
[jejb: fix incorrect 32 bit do_div spotted by kbuild test robot <fengguang.wu@intel.com>]
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-03-06 10:47:01 +08:00
|
|
|
string_get_size(capacity, queue_logical_block_size(q),
|
|
|
|
STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
|
|
|
|
string_get_size(capacity, queue_logical_block_size(q),
|
|
|
|
STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
|
2011-02-02 04:43:48 +08:00
|
|
|
|
|
|
|
dev_notice(&vdev->dev,
|
|
|
|
"new size: %llu %d-byte logical blocks (%s/%s)\n",
|
|
|
|
(unsigned long long)capacity,
|
|
|
|
queue_logical_block_size(q),
|
|
|
|
cap_str_10, cap_str_2);
|
|
|
|
|
|
|
|
set_capacity(vblk->disk, capacity);
|
2012-03-29 16:09:44 +08:00
|
|
|
revalidate_disk(vblk->disk);
|
2013-03-12 13:04:40 +08:00
|
|
|
kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
|
2011-02-02 04:43:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtblk_config_changed(struct virtio_device *vdev)
|
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = vdev->priv;
|
|
|
|
|
|
|
|
queue_work(virtblk_wq, &vblk->config_work);
|
|
|
|
}
|
|
|
|
|
2011-12-22 19:28:29 +08:00
|
|
|
static int init_vq(struct virtio_blk *vblk)
|
|
|
|
{
|
|
|
|
int err = 0;
|
2014-06-26 17:41:48 +08:00
|
|
|
int i;
|
|
|
|
vq_callback_t **callbacks;
|
|
|
|
const char **names;
|
|
|
|
struct virtqueue **vqs;
|
|
|
|
unsigned short num_vqs;
|
|
|
|
struct virtio_device *vdev = vblk->vdev;
|
|
|
|
|
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
|
|
|
|
struct virtio_blk_config, num_queues,
|
|
|
|
&num_vqs);
|
|
|
|
if (err)
|
|
|
|
num_vqs = 1;
|
|
|
|
|
|
|
|
vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
|
|
|
|
if (!vblk->vqs) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
|
|
|
|
if (!names)
|
|
|
|
goto err_names;
|
|
|
|
|
|
|
|
callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
|
|
|
|
if (!callbacks)
|
|
|
|
goto err_callbacks;
|
|
|
|
|
|
|
|
vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
|
|
|
|
if (!vqs)
|
|
|
|
goto err_vqs;
|
2011-12-22 19:28:29 +08:00
|
|
|
|
2014-06-26 17:41:48 +08:00
|
|
|
for (i = 0; i < num_vqs; i++) {
|
|
|
|
callbacks[i] = virtblk_done;
|
|
|
|
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
|
|
|
|
names[i] = vblk->vqs[i].name;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Discover virtqueues and write information to configuration. */
|
|
|
|
err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
|
|
|
|
if (err)
|
|
|
|
goto err_find_vqs;
|
2011-12-22 19:28:29 +08:00
|
|
|
|
2014-06-26 17:41:48 +08:00
|
|
|
for (i = 0; i < num_vqs; i++) {
|
|
|
|
spin_lock_init(&vblk->vqs[i].lock);
|
|
|
|
vblk->vqs[i].vq = vqs[i];
|
|
|
|
}
|
|
|
|
vblk->num_vqs = num_vqs;
|
|
|
|
|
|
|
|
err_find_vqs:
|
|
|
|
kfree(vqs);
|
|
|
|
err_vqs:
|
|
|
|
kfree(callbacks);
|
|
|
|
err_callbacks:
|
|
|
|
kfree(names);
|
|
|
|
err_names:
|
|
|
|
if (err)
|
|
|
|
kfree(vblk->vqs);
|
|
|
|
out:
|
2011-12-22 19:28:29 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2012-04-10 15:28:05 +08:00
|
|
|
/*
|
|
|
|
* Legacy naming scheme used for virtio devices. We are stuck with it for
|
|
|
|
* virtio blk but don't ever use it for any new driver.
|
|
|
|
*/
|
|
|
|
static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
|
|
|
|
{
|
|
|
|
const int base = 'z' - 'a' + 1;
|
|
|
|
char *begin = buf + strlen(prefix);
|
|
|
|
char *end = buf + buflen;
|
|
|
|
char *p;
|
|
|
|
int unit;
|
|
|
|
|
|
|
|
p = end - 1;
|
|
|
|
*p = '\0';
|
|
|
|
unit = base;
|
|
|
|
do {
|
|
|
|
if (p == begin)
|
|
|
|
return -EINVAL;
|
|
|
|
*--p = 'a' + (index % unit);
|
|
|
|
index = (index / unit) - 1;
|
|
|
|
} while (index >= 0);
|
|
|
|
|
|
|
|
memmove(begin, p, end - p);
|
|
|
|
memcpy(buf, prefix, strlen(prefix));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-07-03 21:19:37 +08:00
|
|
|
static int virtblk_get_cache_mode(struct virtio_device *vdev)
|
|
|
|
{
|
|
|
|
u8 writeback;
|
|
|
|
int err;
|
|
|
|
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
|
|
|
|
struct virtio_blk_config, wce,
|
|
|
|
&writeback);
|
2016-02-24 23:07:27 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If WCE is not configurable and flush is not available,
|
|
|
|
* assume no writeback cache is in use.
|
|
|
|
*/
|
2012-07-03 21:19:37 +08:00
|
|
|
if (err)
|
2016-02-24 23:07:27 +08:00
|
|
|
writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
|
2012-07-03 21:19:37 +08:00
|
|
|
|
|
|
|
return writeback;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtblk_update_cache_mode(struct virtio_device *vdev)
|
|
|
|
{
|
|
|
|
u8 writeback = virtblk_get_cache_mode(vdev);
|
|
|
|
struct virtio_blk *vblk = vdev->priv;
|
|
|
|
|
2016-03-31 00:12:58 +08:00
|
|
|
blk_queue_write_cache(vblk->disk->queue, writeback, false);
|
2012-07-03 21:19:37 +08:00
|
|
|
revalidate_disk(vblk->disk);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *const virtblk_cache_types[] = {
|
|
|
|
"write through", "write back"
|
|
|
|
};
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
|
|
struct virtio_blk *vblk = disk->private_data;
|
|
|
|
struct virtio_device *vdev = vblk->vdev;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
|
|
|
|
for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
|
|
|
|
if (sysfs_streq(buf, virtblk_cache_types[i]))
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (i < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2013-10-14 15:41:51 +08:00
|
|
|
virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
|
2012-07-03 21:19:37 +08:00
|
|
|
virtblk_update_cache_mode(vdev);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
|
|
struct virtio_blk *vblk = disk->private_data;
|
|
|
|
u8 writeback = virtblk_get_cache_mode(vblk->vdev);
|
|
|
|
|
|
|
|
BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
|
|
|
|
return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct device_attribute dev_attr_cache_type_ro =
|
|
|
|
__ATTR(cache_type, S_IRUGO,
|
|
|
|
virtblk_cache_type_show, NULL);
|
|
|
|
static const struct device_attribute dev_attr_cache_type_rw =
|
|
|
|
__ATTR(cache_type, S_IRUGO|S_IWUSR,
|
|
|
|
virtblk_cache_type_show, virtblk_cache_type_store);
|
|
|
|
|
2014-04-16 04:14:00 +08:00
|
|
|
static int virtblk_init_request(void *data, struct request *rq,
|
|
|
|
unsigned int hctx_idx, unsigned int request_idx,
|
|
|
|
unsigned int numa_node)
|
2014-04-16 03:59:10 +08:00
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = data;
|
|
|
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
|
|
|
|
|
|
|
|
sg_init_table(vbr->sg, vblk->sg_elems);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-02 00:52:52 +08:00
|
|
|
static struct blk_mq_ops virtio_mq_ops = {
|
|
|
|
.queue_rq = virtio_queue_rq,
|
|
|
|
.map_queue = blk_mq_map_queue,
|
2014-02-10 19:24:39 +08:00
|
|
|
.complete = virtblk_request_done,
|
2014-04-16 04:14:00 +08:00
|
|
|
.init_request = virtblk_init_request,
|
2013-11-02 00:52:52 +08:00
|
|
|
};
|
|
|
|
|
2014-04-16 04:14:00 +08:00
|
|
|
static unsigned int virtblk_queue_depth;
|
|
|
|
module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
|
2013-11-02 00:52:52 +08:00
|
|
|
|
2012-12-22 07:13:49 +08:00
|
|
|
static int virtblk_probe(struct virtio_device *vdev)
|
2007-10-22 09:03:38 +08:00
|
|
|
{
|
|
|
|
struct virtio_blk *vblk;
|
2010-02-25 04:22:25 +08:00
|
|
|
struct request_queue *q;
|
2011-10-31 03:29:59 +08:00
|
|
|
int err, index;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
|
2007-10-22 09:03:38 +08:00
|
|
|
u64 cap;
|
2010-02-25 04:22:25 +08:00
|
|
|
u32 v, blk_size, sg_elems, opt_io_size;
|
|
|
|
u16 min_io_size;
|
|
|
|
u8 physical_block_exp, alignment_offset;
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2015-01-12 22:23:37 +08:00
|
|
|
if (!vdev->config->get) {
|
|
|
|
dev_err(&vdev->dev, "%s failure: config access disabled\n",
|
|
|
|
__func__);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2011-10-31 03:29:59 +08:00
|
|
|
err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (err < 0)
|
|
|
|
goto out;
|
|
|
|
index = err;
|
2008-01-31 22:53:53 +08:00
|
|
|
|
2008-12-30 23:26:05 +08:00
|
|
|
/* We need to know how many segments before we allocate. */
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
|
|
|
|
struct virtio_blk_config, seg_max,
|
|
|
|
&sg_elems);
|
2010-05-25 20:17:54 +08:00
|
|
|
|
|
|
|
/* We need at least one SG element, whatever they say. */
|
|
|
|
if (err || !sg_elems)
|
2008-12-30 23:26:05 +08:00
|
|
|
sg_elems = 1;
|
|
|
|
|
|
|
|
/* We need an extra sg elements at head and tail. */
|
|
|
|
sg_elems += 2;
|
2013-11-02 00:52:52 +08:00
|
|
|
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
|
2007-10-22 09:03:38 +08:00
|
|
|
if (!vblk) {
|
|
|
|
err = -ENOMEM;
|
2011-10-31 03:29:59 +08:00
|
|
|
goto out_free_index;
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vblk->vdev = vdev;
|
2008-12-30 23:26:05 +08:00
|
|
|
vblk->sg_elems = sg_elems;
|
virtio-blk: Add bio-based IO path for virtio-blk
This patch introduces bio-based IO path for virtio-blk.
Compared to request-based IO path, bio-based IO path uses driver
provided ->make_request_fn() method to bypasses the IO scheduler. It
handles the bio to device directly without allocating a request in block
layer. This reduces the IO path in guest kernel to achieve high IOPS
and lower latency. The downside is that guest can not use the IO
scheduler to merge and sort requests. However, this is not a big problem
if the backend disk in host side uses faster disk device.
When the bio-based IO path is not enabled, virtio-blk still uses the
original request-based IO path, no performance difference is observed.
Using a slow device e.g. normal SATA disk, the bio-based IO path for
sequential read and write are slower than req-based IO path due to lack
of merge in guest kernel. So we make the bio-based path optional.
Performance evaluation:
-----------------------------
1) Fio test is performed in a 8 vcpu guest with ramdisk based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 28%, 24%, 21%, 16%
Latency improvement: 32%, 17%, 21%, 16%
Long version:
With bio-based IO path:
seq-read : io=2048.0MB, bw=116996KB/s, iops=233991 , runt= 17925msec
seq-write : io=2048.0MB, bw=100829KB/s, iops=201658 , runt= 20799msec
rand-read : io=3095.7MB, bw=112134KB/s, iops=224268 , runt= 28269msec
rand-write: io=3095.7MB, bw=96198KB/s, iops=192396 , runt= 32952msec
clat (usec): min=0 , max=2631.6K, avg=58716.99, stdev=191377.30
clat (usec): min=0 , max=1753.2K, avg=66423.25, stdev=81774.35
clat (usec): min=0 , max=2915.5K, avg=61685.70, stdev=120598.39
clat (usec): min=0 , max=1933.4K, avg=76935.12, stdev=96603.45
cpu : usr=74.08%, sys=703.84%, ctx=29661403, majf=21354, minf=22460954
cpu : usr=70.92%, sys=702.81%, ctx=77219828, majf=13980, minf=27713137
cpu : usr=72.23%, sys=695.37%, ctx=88081059, majf=18475, minf=28177648
cpu : usr=69.69%, sys=654.13%, ctx=145476035, majf=15867, minf=26176375
With request-based IO path:
seq-read : io=2048.0MB, bw=91074KB/s, iops=182147 , runt= 23027msec
seq-write : io=2048.0MB, bw=80725KB/s, iops=161449 , runt= 25979msec
rand-read : io=3095.7MB, bw=92106KB/s, iops=184211 , runt= 34416msec
rand-write: io=3095.7MB, bw=82815KB/s, iops=165630 , runt= 38277msec
clat (usec): min=0 , max=1932.4K, avg=77824.17, stdev=170339.49
clat (usec): min=0 , max=2510.2K, avg=78023.96, stdev=146949.15
clat (usec): min=0 , max=3037.2K, avg=74746.53, stdev=128498.27
clat (usec): min=0 , max=1363.4K, avg=89830.75, stdev=114279.68
cpu : usr=53.28%, sys=724.19%, ctx=37988895, majf=17531, minf=23577622
cpu : usr=49.03%, sys=633.20%, ctx=205935380, majf=18197, minf=27288959
cpu : usr=55.78%, sys=722.40%, ctx=101525058, majf=19273, minf=28067082
cpu : usr=56.55%, sys=690.83%, ctx=228205022, majf=18039, minf=26551985
2) Fio test is performed in a 8 vcpu guest with Fusion-IO based guest using
kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : 11%, 11%, 13%, 10%
Latency improvement: 10%, 10%, 12%, 10%
Long Version:
With bio-based IO path:
read : io=2048.0MB, bw=58920KB/s, iops=117840 , runt= 35593msec
write: io=2048.0MB, bw=64308KB/s, iops=128616 , runt= 32611msec
read : io=3095.7MB, bw=59633KB/s, iops=119266 , runt= 53157msec
write: io=3095.7MB, bw=62993KB/s, iops=125985 , runt= 50322msec
clat (usec): min=0 , max=1284.3K, avg=128109.01, stdev=71513.29
clat (usec): min=94 , max=962339 , avg=116832.95, stdev=65836.80
clat (usec): min=0 , max=1846.6K, avg=128509.99, stdev=89575.07
clat (usec): min=0 , max=2256.4K, avg=121361.84, stdev=82747.25
cpu : usr=56.79%, sys=421.70%, ctx=147335118, majf=21080, minf=19852517
cpu : usr=61.81%, sys=455.53%, ctx=143269950, majf=16027, minf=24800604
cpu : usr=63.10%, sys=455.38%, ctx=178373538, majf=16958, minf=24822612
cpu : usr=62.04%, sys=453.58%, ctx=226902362, majf=16089, minf=23278105
With request-based IO path:
read : io=2048.0MB, bw=52896KB/s, iops=105791 , runt= 39647msec
write: io=2048.0MB, bw=57856KB/s, iops=115711 , runt= 36248msec
read : io=3095.7MB, bw=52387KB/s, iops=104773 , runt= 60510msec
write: io=3095.7MB, bw=57310KB/s, iops=114619 , runt= 55312msec
clat (usec): min=0 , max=1532.6K, avg=142085.62, stdev=109196.84
clat (usec): min=0 , max=1487.4K, avg=129110.71, stdev=114973.64
clat (usec): min=0 , max=1388.6K, avg=145049.22, stdev=107232.55
clat (usec): min=0 , max=1465.9K, avg=133585.67, stdev=110322.95
cpu : usr=44.08%, sys=590.71%, ctx=451812322, majf=14841, minf=17648641
cpu : usr=48.73%, sys=610.78%, ctx=418953997, majf=22164, minf=26850689
cpu : usr=45.58%, sys=581.16%, ctx=714079216, majf=21497, minf=22558223
cpu : usr=48.40%, sys=599.65%, ctx=656089423, majf=16393, minf=23824409
3) Fio test is performed in a 8 vcpu guest with normal SATA based guest
using kvm tool.
Short version:
With bio-based IO path, sequential read/write, random read/write
IOPS boost : -10%, -10%, 4.4%, 0.5%
Latency improvement: -12%, -15%, 2.5%, 0.8%
Long Version:
With bio-based IO path:
read : io=124812KB, bw=36537KB/s, iops=9060 , runt= 3416msec
write: io=169180KB, bw=24406KB/s, iops=6065 , runt= 6932msec
read : io=256200KB, bw=2089.3KB/s, iops=520 , runt=122630msec
write: io=257988KB, bw=1545.7KB/s, iops=384 , runt=166910msec
clat (msec): min=1 , max=1527 , avg=28.06, stdev=89.54
clat (msec): min=2 , max=344 , avg=41.12, stdev=38.70
clat (msec): min=8 , max=1984 , avg=490.63, stdev=207.28
clat (msec): min=33 , max=4131 , avg=659.19, stdev=304.71
cpu : usr=4.85%, sys=17.15%, ctx=31593, majf=0, minf=7
cpu : usr=3.04%, sys=11.45%, ctx=39377, majf=0, minf=0
cpu : usr=0.47%, sys=1.59%, ctx=262986, majf=0, minf=16
cpu : usr=0.47%, sys=1.46%, ctx=337410, majf=0, minf=0
With request-based IO path:
read : io=150120KB, bw=40420KB/s, iops=10037 , runt= 3714msec
write: io=194932KB, bw=27029KB/s, iops=6722 , runt= 7212msec
read : io=257136KB, bw=2001.1KB/s, iops=498 , runt=128443msec
write: io=258276KB, bw=1537.2KB/s, iops=382 , runt=168028msec
clat (msec): min=1 , max=1542 , avg=24.84, stdev=32.45
clat (msec): min=3 , max=628 , avg=35.62, stdev=39.71
clat (msec): min=8 , max=2540 , avg=503.28, stdev=236.97
clat (msec): min=41 , max=4398 , avg=653.88, stdev=302.61
cpu : usr=3.91%, sys=15.75%, ctx=26968, majf=0, minf=23
cpu : usr=2.50%, sys=10.56%, ctx=19090, majf=0, minf=0
cpu : usr=0.16%, sys=0.43%, ctx=20159, majf=0, minf=16
cpu : usr=0.18%, sys=0.53%, ctx=81364, majf=0, minf=0
How to use:
-----------------------------
Add 'virtio_blk.use_bio=1' to kernel cmdline or 'modprobe virtio_blk
use_bio=1' to enable ->make_request_fn() based I/O path.
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2012-08-08 16:07:04 +08:00
|
|
|
|
2011-02-02 04:43:48 +08:00
|
|
|
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2011-12-22 19:28:29 +08:00
|
|
|
err = init_vq(vblk);
|
|
|
|
if (err)
|
2007-10-22 09:03:38 +08:00
|
|
|
goto out_free_vblk;
|
|
|
|
|
|
|
|
/* FIXME: How many partitions? How long is a piece of string? */
|
2008-01-31 22:53:53 +08:00
|
|
|
vblk->disk = alloc_disk(1 << PART_BITS);
|
2007-10-22 09:03:38 +08:00
|
|
|
if (!vblk->disk) {
|
|
|
|
err = -ENOMEM;
|
2013-11-02 00:52:52 +08:00
|
|
|
goto out_free_vq;
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2014-03-19 14:38:24 +08:00
|
|
|
/* Default queue sizing is to fill the ring. */
|
2014-04-16 04:14:00 +08:00
|
|
|
if (!virtblk_queue_depth) {
|
2014-06-26 17:41:48 +08:00
|
|
|
virtblk_queue_depth = vblk->vqs[0].vq->num_free;
|
2014-03-19 14:38:24 +08:00
|
|
|
/* ... but without indirect descs, we use 2 descs per req */
|
|
|
|
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
|
2014-04-16 04:14:00 +08:00
|
|
|
virtblk_queue_depth /= 2;
|
2014-03-19 14:38:24 +08:00
|
|
|
}
|
2014-04-16 04:14:00 +08:00
|
|
|
|
|
|
|
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
|
|
|
|
vblk->tag_set.ops = &virtio_mq_ops;
|
|
|
|
vblk->tag_set.queue_depth = virtblk_queue_depth;
|
|
|
|
vblk->tag_set.numa_node = NUMA_NO_NODE;
|
|
|
|
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
|
|
|
vblk->tag_set.cmd_size =
|
2013-11-02 00:52:52 +08:00
|
|
|
sizeof(struct virtblk_req) +
|
|
|
|
sizeof(struct scatterlist) * sg_elems;
|
2014-04-16 04:14:00 +08:00
|
|
|
vblk->tag_set.driver_data = vblk;
|
2014-06-26 17:41:48 +08:00
|
|
|
vblk->tag_set.nr_hw_queues = vblk->num_vqs;
|
2013-11-02 00:52:52 +08:00
|
|
|
|
2014-04-16 04:14:00 +08:00
|
|
|
err = blk_mq_alloc_tag_set(&vblk->tag_set);
|
|
|
|
if (err)
|
|
|
|
goto out_put_disk;
|
|
|
|
|
|
|
|
q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
|
2015-01-02 22:25:27 +08:00
|
|
|
if (IS_ERR(q)) {
|
2007-10-22 09:03:38 +08:00
|
|
|
err = -ENOMEM;
|
2014-04-16 04:14:00 +08:00
|
|
|
goto out_free_tags;
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2010-02-25 04:22:25 +08:00
|
|
|
q->queuedata = vblk;
|
2008-10-27 17:45:15 +08:00
|
|
|
|
2012-04-10 15:28:05 +08:00
|
|
|
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
|
2008-02-01 16:05:00 +08:00
|
|
|
|
2007-10-22 09:03:38 +08:00
|
|
|
vblk->disk->major = major;
|
2008-02-01 16:05:00 +08:00
|
|
|
vblk->disk->first_minor = index_to_minor(index);
|
2007-10-22 09:03:38 +08:00
|
|
|
vblk->disk->private_data = vblk;
|
|
|
|
vblk->disk->fops = &virtblk_fops;
|
2008-03-03 06:00:15 +08:00
|
|
|
vblk->disk->driverfs_dev = &vdev->dev;
|
2015-09-06 17:05:42 +08:00
|
|
|
vblk->disk->flags |= GENHD_FL_EXT_DEVT;
|
2011-10-31 03:29:59 +08:00
|
|
|
vblk->index = index;
|
2008-01-31 22:53:53 +08:00
|
|
|
|
2010-09-03 17:56:18 +08:00
|
|
|
/* configure queue flush support */
|
2012-07-03 21:19:37 +08:00
|
|
|
virtblk_update_cache_mode(vdev);
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2008-05-16 17:17:03 +08:00
|
|
|
/* If disk is read-only in the host, the guest should obey */
|
|
|
|
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
|
|
|
|
set_disk_ro(vblk->disk, 1);
|
|
|
|
|
2008-02-05 12:49:56 +08:00
|
|
|
/* Host must always specify the capacity. */
|
2013-10-14 15:41:51 +08:00
|
|
|
virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
|
2007-10-22 09:03:38 +08:00
|
|
|
|
|
|
|
/* If capacity is too big, truncate with warning. */
|
|
|
|
if ((sector_t)cap != cap) {
|
|
|
|
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
|
|
|
|
(unsigned long long)cap);
|
|
|
|
cap = (sector_t)-1;
|
|
|
|
}
|
|
|
|
set_capacity(vblk->disk, cap);
|
|
|
|
|
2008-12-30 23:26:05 +08:00
|
|
|
/* We can handle whatever the host told us to handle. */
|
2010-03-10 13:48:32 +08:00
|
|
|
blk_queue_max_segments(q, vblk->sg_elems-2);
|
2008-12-30 23:26:05 +08:00
|
|
|
|
2009-07-18 11:47:45 +08:00
|
|
|
/* No need to bounce any requests */
|
2010-02-25 04:22:25 +08:00
|
|
|
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
|
2009-07-18 11:47:45 +08:00
|
|
|
|
2008-12-30 23:26:04 +08:00
|
|
|
/* No real sector limit. */
|
2010-03-10 13:48:32 +08:00
|
|
|
blk_queue_max_hw_sectors(q, -1U);
|
2008-12-30 23:26:04 +08:00
|
|
|
|
2008-02-05 12:49:56 +08:00
|
|
|
/* Host can optionally specify maximum segment size and number of
|
|
|
|
* segments. */
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
|
|
|
|
struct virtio_blk_config, size_max, &v);
|
2007-10-22 09:03:38 +08:00
|
|
|
if (!err)
|
2010-02-25 04:22:25 +08:00
|
|
|
blk_queue_max_segment_size(q, v);
|
2008-12-30 23:26:04 +08:00
|
|
|
else
|
2010-02-25 04:22:25 +08:00
|
|
|
blk_queue_max_segment_size(q, -1U);
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2008-05-29 17:08:26 +08:00
|
|
|
/* Host can optionally specify the block size of the device */
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
|
|
|
|
struct virtio_blk_config, blk_size,
|
|
|
|
&blk_size);
|
2008-05-29 17:08:26 +08:00
|
|
|
if (!err)
|
2010-02-25 04:22:25 +08:00
|
|
|
blk_queue_logical_block_size(q, blk_size);
|
|
|
|
else
|
|
|
|
blk_size = queue_logical_block_size(q);
|
|
|
|
|
|
|
|
/* Use topology information if available */
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
|
|
|
struct virtio_blk_config, physical_block_exp,
|
|
|
|
&physical_block_exp);
|
2010-02-25 04:22:25 +08:00
|
|
|
if (!err && physical_block_exp)
|
|
|
|
blk_queue_physical_block_size(q,
|
|
|
|
blk_size * (1 << physical_block_exp));
|
|
|
|
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
|
|
|
struct virtio_blk_config, alignment_offset,
|
|
|
|
&alignment_offset);
|
2010-02-25 04:22:25 +08:00
|
|
|
if (!err && alignment_offset)
|
|
|
|
blk_queue_alignment_offset(q, blk_size * alignment_offset);
|
|
|
|
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
|
|
|
struct virtio_blk_config, min_io_size,
|
|
|
|
&min_io_size);
|
2010-02-25 04:22:25 +08:00
|
|
|
if (!err && min_io_size)
|
|
|
|
blk_queue_io_min(q, blk_size * min_io_size);
|
|
|
|
|
2013-10-14 15:41:51 +08:00
|
|
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
|
|
|
struct virtio_blk_config, opt_io_size,
|
|
|
|
&opt_io_size);
|
2010-02-25 04:22:25 +08:00
|
|
|
if (!err && opt_io_size)
|
|
|
|
blk_queue_io_opt(q, blk_size * opt_io_size);
|
|
|
|
|
2014-10-15 07:52:30 +08:00
|
|
|
virtio_device_ready(vdev);
|
|
|
|
|
2007-10-22 09:03:38 +08:00
|
|
|
add_disk(vblk->disk);
|
2010-06-24 11:19:57 +08:00
|
|
|
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
|
|
|
|
if (err)
|
|
|
|
goto out_del_disk;
|
|
|
|
|
2012-07-03 21:19:37 +08:00
|
|
|
if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
|
|
|
|
err = device_create_file(disk_to_dev(vblk->disk),
|
|
|
|
&dev_attr_cache_type_rw);
|
|
|
|
else
|
|
|
|
err = device_create_file(disk_to_dev(vblk->disk),
|
|
|
|
&dev_attr_cache_type_ro);
|
|
|
|
if (err)
|
|
|
|
goto out_del_disk;
|
2007-10-22 09:03:38 +08:00
|
|
|
return 0;
|
|
|
|
|
2010-06-24 11:19:57 +08:00
|
|
|
out_del_disk:
|
|
|
|
del_gendisk(vblk->disk);
|
|
|
|
blk_cleanup_queue(vblk->disk->queue);
|
2014-04-16 04:14:00 +08:00
|
|
|
out_free_tags:
|
|
|
|
blk_mq_free_tag_set(&vblk->tag_set);
|
2007-10-22 09:03:38 +08:00
|
|
|
out_put_disk:
|
|
|
|
put_disk(vblk->disk);
|
|
|
|
out_free_vq:
|
2009-06-13 12:16:36 +08:00
|
|
|
vdev->config->del_vqs(vdev);
|
2007-10-22 09:03:38 +08:00
|
|
|
out_free_vblk:
|
|
|
|
kfree(vblk);
|
2011-10-31 03:29:59 +08:00
|
|
|
out_free_index:
|
|
|
|
ida_simple_remove(&vd_index_ida, index);
|
2007-10-22 09:03:38 +08:00
|
|
|
out:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2012-12-22 07:13:49 +08:00
|
|
|
static void virtblk_remove(struct virtio_device *vdev)
|
2007-10-22 09:03:38 +08:00
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = vdev->priv;
|
2011-10-31 03:29:59 +08:00
|
|
|
int index = vblk->index;
|
2013-01-02 13:07:17 +08:00
|
|
|
int refc;
|
2007-10-22 09:03:38 +08:00
|
|
|
|
2014-10-15 07:52:26 +08:00
|
|
|
/* Make sure no work handler is accessing the device. */
|
|
|
|
flush_work(&vblk->config_work);
|
2011-02-02 04:43:48 +08:00
|
|
|
|
2012-05-25 10:34:47 +08:00
|
|
|
del_gendisk(vblk->disk);
|
2012-05-25 10:34:48 +08:00
|
|
|
blk_cleanup_queue(vblk->disk->queue);
|
2012-05-25 10:34:47 +08:00
|
|
|
|
2014-04-16 04:14:00 +08:00
|
|
|
blk_mq_free_tag_set(&vblk->tag_set);
|
|
|
|
|
2008-02-05 12:50:03 +08:00
|
|
|
/* Stop all the virtqueues. */
|
|
|
|
vdev->config->reset(vdev);
|
|
|
|
|
2013-01-02 13:07:17 +08:00
|
|
|
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
|
2007-10-22 09:03:38 +08:00
|
|
|
put_disk(vblk->disk);
|
2009-06-13 12:16:36 +08:00
|
|
|
vdev->config->del_vqs(vdev);
|
2014-06-26 17:41:48 +08:00
|
|
|
kfree(vblk->vqs);
|
2007-10-22 09:03:38 +08:00
|
|
|
kfree(vblk);
|
2013-01-02 13:07:17 +08:00
|
|
|
|
|
|
|
/* Only free device id if we don't have any users */
|
|
|
|
if (refc == 1)
|
|
|
|
ida_simple_remove(&vd_index_ida, index);
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
2013-09-17 07:55:23 +08:00
|
|
|
#ifdef CONFIG_PM_SLEEP
|
2011-12-22 19:28:30 +08:00
|
|
|
static int virtblk_freeze(struct virtio_device *vdev)
|
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = vdev->priv;
|
|
|
|
|
|
|
|
/* Ensure we don't receive any more interrupts */
|
|
|
|
vdev->config->reset(vdev);
|
|
|
|
|
2014-10-15 07:52:26 +08:00
|
|
|
/* Make sure no work handler is accessing the device. */
|
2011-12-22 19:28:30 +08:00
|
|
|
flush_work(&vblk->config_work);
|
|
|
|
|
2013-11-02 00:52:52 +08:00
|
|
|
blk_mq_stop_hw_queues(vblk->disk->queue);
|
2011-12-22 19:28:30 +08:00
|
|
|
|
|
|
|
vdev->config->del_vqs(vdev);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virtblk_restore(struct virtio_device *vdev)
|
|
|
|
{
|
|
|
|
struct virtio_blk *vblk = vdev->priv;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = init_vq(vdev->priv);
|
2014-10-15 07:52:32 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
virtio_device_ready(vdev);
|
2013-11-02 00:52:52 +08:00
|
|
|
|
2014-10-15 07:52:32 +08:00
|
|
|
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
|
|
|
|
return 0;
|
2011-12-22 19:28:30 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-01-10 20:40:02 +08:00
|
|
|
static const struct virtio_device_id id_table[] = {
|
2007-10-22 09:03:38 +08:00
|
|
|
{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
|
|
|
|
{ 0 },
|
|
|
|
};
|
|
|
|
|
2014-10-07 22:39:49 +08:00
|
|
|
static unsigned int features_legacy[] = {
|
2010-09-03 17:56:18 +08:00
|
|
|
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
|
|
|
|
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
|
2016-02-24 23:07:27 +08:00
|
|
|
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
|
2014-06-26 17:41:48 +08:00
|
|
|
VIRTIO_BLK_F_MQ,
|
2014-10-07 22:39:49 +08:00
|
|
|
}
|
|
|
|
;
|
|
|
|
static unsigned int features[] = {
|
|
|
|
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
|
|
|
|
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
|
2016-02-24 23:07:27 +08:00
|
|
|
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
|
2014-10-07 22:39:49 +08:00
|
|
|
VIRTIO_BLK_F_MQ,
|
2008-05-03 10:50:50 +08:00
|
|
|
};
|
|
|
|
|
2012-12-22 07:13:49 +08:00
|
|
|
static struct virtio_driver virtio_blk = {
|
2014-10-07 22:39:49 +08:00
|
|
|
.feature_table = features,
|
|
|
|
.feature_table_size = ARRAY_SIZE(features),
|
|
|
|
.feature_table_legacy = features_legacy,
|
|
|
|
.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
|
|
|
|
.driver.name = KBUILD_MODNAME,
|
|
|
|
.driver.owner = THIS_MODULE,
|
|
|
|
.id_table = id_table,
|
|
|
|
.probe = virtblk_probe,
|
|
|
|
.remove = virtblk_remove,
|
|
|
|
.config_changed = virtblk_config_changed,
|
2013-09-17 07:55:23 +08:00
|
|
|
#ifdef CONFIG_PM_SLEEP
|
2014-10-07 22:39:49 +08:00
|
|
|
.freeze = virtblk_freeze,
|
|
|
|
.restore = virtblk_restore,
|
2011-12-22 19:28:30 +08:00
|
|
|
#endif
|
2007-10-22 09:03:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init init(void)
|
|
|
|
{
|
2011-02-02 04:43:48 +08:00
|
|
|
int error;
|
|
|
|
|
|
|
|
virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
|
|
|
|
if (!virtblk_wq)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2008-01-31 22:53:53 +08:00
|
|
|
major = register_blkdev(0, "virtblk");
|
2011-02-02 04:43:48 +08:00
|
|
|
if (major < 0) {
|
|
|
|
error = major;
|
|
|
|
goto out_destroy_workqueue;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = register_virtio_driver(&virtio_blk);
|
|
|
|
if (error)
|
|
|
|
goto out_unregister_blkdev;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_unregister_blkdev:
|
|
|
|
unregister_blkdev(major, "virtblk");
|
|
|
|
out_destroy_workqueue:
|
|
|
|
destroy_workqueue(virtblk_wq);
|
|
|
|
return error;
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit fini(void)
|
|
|
|
{
|
|
|
|
unregister_virtio_driver(&virtio_blk);
|
2014-10-23 23:57:19 +08:00
|
|
|
unregister_blkdev(major, "virtblk");
|
2011-02-02 04:43:48 +08:00
|
|
|
destroy_workqueue(virtblk_wq);
|
2007-10-22 09:03:38 +08:00
|
|
|
}
|
|
|
|
module_init(init);
|
|
|
|
module_exit(fini);
|
|
|
|
|
|
|
|
MODULE_DEVICE_TABLE(virtio, id_table);
|
|
|
|
MODULE_DESCRIPTION("Virtio block driver");
|
|
|
|
MODULE_LICENSE("GPL");
|