io_uring: avoid whole io_wq_work copy for requests completed inline

If requests can be submitted and completed inline, we don't need to
initialize whole io_wq_work in io_init_req(), which is an expensive
operation, add a new 'REQ_F_WORK_INITIALIZED' to determine whether
io_wq_work is initialized and add a helper io_req_init_async(), users
must call io_req_init_async() for the first time touching any members
of io_wq_work.

I use /dev/nullb0 to evaluate performance improvement in my physical
machine:
  modprobe null_blk nr_devices=1 completion_nsec=0
  sudo taskset -c 60 fio  -name=fiotest -filename=/dev/nullb0 -iodepth=128
  -thread -rw=read -ioengine=io_uring -direct=1 -bs=4k -size=100G -numjobs=1
  -time_based -runtime=120

before this patch:
Run status group 0 (all jobs):
   READ: bw=724MiB/s (759MB/s), 724MiB/s-724MiB/s (759MB/s-759MB/s),
   io=84.8GiB (91.1GB), run=120001-120001msec

With this patch:
Run status group 0 (all jobs):
   READ: bw=761MiB/s (798MB/s), 761MiB/s-761MiB/s (798MB/s-798MB/s),
   io=89.2GiB (95.8GB), run=120001-120001msec

About 5% improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Xiaoguang Wang 2020-06-10 19:41:19 +08:00 committed by Jens Axboe
parent c5b856255c
commit 7cdaf587de
2 changed files with 36 additions and 9 deletions

View File

@ -93,11 +93,6 @@ struct io_wq_work {
pid_t task_pid;
};
#define INIT_IO_WORK(work) \
do { \
*(work) = (struct io_wq_work){}; \
} while (0) \
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{
if (!work->list.next)

View File

@ -542,6 +542,7 @@ enum {
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_QUEUE_TIMEOUT_BIT,
REQ_F_WORK_INITIALIZED_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@ -599,6 +600,8 @@ enum {
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* needs to queue linked timeout */
REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
};
struct async_poll {
@ -911,6 +914,19 @@ EXPORT_SYMBOL(io_uring_get_socket);
static void io_file_put_work(struct work_struct *work);
/*
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
*/
static inline void io_req_init_async(struct io_kiocb *req)
{
if (req->flags & REQ_F_WORK_INITIALIZED)
return;
memset(&req->work, 0, sizeof(req->work));
req->flags |= REQ_F_WORK_INITIALIZED;
}
static inline bool io_async_submit(struct io_ring_ctx *ctx)
{
return ctx->flags & IORING_SETUP_SQPOLL;
@ -1037,6 +1053,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
static inline void io_req_work_drop_env(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_WORK_INITIALIZED))
return;
if (req->work.mm) {
mmdrop(req->work.mm);
req->work.mm = NULL;
@ -2785,8 +2804,14 @@ static int __io_splice_prep(struct io_kiocb *req,
return ret;
req->flags |= REQ_F_NEED_CLEANUP;
if (!S_ISREG(file_inode(sp->file_in)->i_mode))
if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
/*
* Splice operation will be punted aync, and here need to
* modify io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
return 0;
}
@ -3372,8 +3397,10 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
/*
* If we queue this for async, it must not be cancellable. That would
* leave the 'file' in an undeterminate state.
* leave the 'file' in an undeterminate state, and here need to modify
* io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_NO_CANCEL;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
@ -4851,6 +4878,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (!sqe)
return 0;
io_req_init_async(req);
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@ -5505,7 +5534,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
again:
linked_timeout = io_prep_linked_timeout(req);
if (req->work.creds && req->work.creds != current_cred()) {
if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
req->work.creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
if (old_creds == req->work.creds)
@ -5528,6 +5558,8 @@ again:
goto exit;
}
punt:
io_req_init_async(req);
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (ret)
@ -5780,7 +5812,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
refcount_set(&req->refs, 2);
req->task = NULL;
req->result = 0;
INIT_IO_WORK(&req->work);
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
@ -5802,6 +5833,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality);
if (id) {
io_req_init_async(req);
req->work.creds = idr_find(&ctx->personality_idr, id);
if (unlikely(!req->work.creds))
return -EINVAL;