mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-28 22:54:05 +08:00
io_uring-5.13-2021-05-07
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmCVVmMQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpv7yEAC/WV1alcH9XdEqLrc2aDwlaScMmSlrMQhY ihtDCR9BsX11E3QcUB7D+VYjBo68uKR+ksa1/GN2Xp+vvqmdjQvZindgto/5b6u1 ko0Dradl2zulCAc7QIdjb2tbmL+Q+JOX5wxv14/+2XabEcce3OegWIvIgX+56NFW ZHg80SQzXUhEtQcAUVCoPeBN+H+xzadgz38VlOI08gOG7/M6tS965GH3tZqTjh2K P7dLjUn0WcxZ3euAYAsQzNN2O2ObJfpCsQtsG2eSf8DGpanPe4gQjAud1BstDtN0 CJ0+b6DHgzQYOAgPFjm7l0jjs+VnIYIMnoBBxm5EkIoktsj0hHdqTnEugoz4wTnS T8WgojaU6jYNx+Jj6vciCLk0lb5c3O3nxmw3w84/rtTwtaEChCAbWdAkl4cleNaw 3/Z2bksCVrQWDVskmu4FP7+kGYpjpV+ZiA2+6OGwILTCN+W7vi079NByQAzdLaRb K/4lEGM7VYEXtq/I7C6VzjtY7gq46TJmpFW+OdQnPIguavp+7vlUl2pLV3oTeGBc E6c+xltgIN+sbbDc/57EJEvhHQod4A6HYOGwBMyjHrhr/sdQ4xvUaJPNmG9HfqRK SM3TOlwpHRWFTgbO+6qoJQSMvACQyE/SDqiPi08q75zFVTNCcYM7uYV3fJMsQ9sj vA+5HAaRKQ== =YwTw -----END PGP SIGNATURE----- Merge tag 'io_uring-5.13-2021-05-07' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: "Mostly fixes for merge window merged code. In detail: - Error case memory leak fixes (Colin, Zqiang) - Add the tools/io_uring/ to the list of maintained files (Lukas) - Set of fixes for the modified buffer registration API (Pavel) - Sanitize io thread setup on x86 (Stefan) - Ensure we truncate transfer count for registered buffers (Thadeu)" * tag 'io_uring-5.13-2021-05-07' of git://git.kernel.dk/linux-block: x86/process: setup io_threads more like normal user space threads MAINTAINERS: add io_uring tool to IO_URING io_uring: truncate lengths larger than MAX_RW_COUNT on provide buffers io_uring: Fix memory leak in io_sqe_buffers_register() io_uring: Fix premature return from loop and memory leak io_uring: fix unchecked error in switch_start() io_uring: allow empty slots for reg buffers io_uring: add more build check for uapi io_uring: dont overlap internal and user req flags io_uring: fix drain with rsrc CQEs
This commit is contained in:
commit
28b4afeb59
@ -9552,6 +9552,7 @@ F: fs/io-wq.h
|
|||||||
F: fs/io_uring.c
|
F: fs/io_uring.c
|
||||||
F: include/linux/io_uring.h
|
F: include/linux/io_uring.h
|
||||||
F: include/uapi/linux/io_uring.h
|
F: include/uapi/linux/io_uring.h
|
||||||
|
F: tools/io_uring/
|
||||||
|
|
||||||
IPMI SUBSYSTEM
|
IPMI SUBSYSTEM
|
||||||
M: Corey Minyard <minyard@acm.org>
|
M: Corey Minyard <minyard@acm.org>
|
||||||
|
@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Kernel thread ? */
|
/* Kernel thread ? */
|
||||||
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
|
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||||
memset(childregs, 0, sizeof(struct pt_regs));
|
memset(childregs, 0, sizeof(struct pt_regs));
|
||||||
kthread_frame_init(frame, sp, arg);
|
kthread_frame_init(frame, sp, arg);
|
||||||
return 0;
|
return 0;
|
||||||
@ -172,6 +172,23 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
|
|||||||
task_user_gs(p) = get_user_gs(current_pt_regs());
|
task_user_gs(p) = get_user_gs(current_pt_regs());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (unlikely(p->flags & PF_IO_WORKER)) {
|
||||||
|
/*
|
||||||
|
* An IO thread is a user space thread, but it doesn't
|
||||||
|
* return to ret_after_fork().
|
||||||
|
*
|
||||||
|
* In order to indicate that to tools like gdb,
|
||||||
|
* we reset the stack and instruction pointers.
|
||||||
|
*
|
||||||
|
* It does the same kernel frame setup to return to a kernel
|
||||||
|
* function that a kernel thread does.
|
||||||
|
*/
|
||||||
|
childregs->sp = 0;
|
||||||
|
childregs->ip = 0;
|
||||||
|
kthread_frame_init(frame, sp, arg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Set a new TLS for the child thread? */
|
/* Set a new TLS for the child thread? */
|
||||||
if (clone_flags & CLONE_SETTLS)
|
if (clone_flags & CLONE_SETTLS)
|
||||||
ret = set_new_tls(p, tls);
|
ret = set_new_tls(p, tls);
|
||||||
|
@ -251,7 +251,7 @@ struct io_rsrc_data {
|
|||||||
struct io_buffer {
|
struct io_buffer {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
__u64 addr;
|
__u64 addr;
|
||||||
__s32 len;
|
__u32 len;
|
||||||
__u16 bid;
|
__u16 bid;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -456,6 +456,7 @@ struct io_ring_ctx {
|
|||||||
spinlock_t rsrc_ref_lock;
|
spinlock_t rsrc_ref_lock;
|
||||||
struct io_rsrc_node *rsrc_node;
|
struct io_rsrc_node *rsrc_node;
|
||||||
struct io_rsrc_node *rsrc_backup_node;
|
struct io_rsrc_node *rsrc_backup_node;
|
||||||
|
struct io_mapped_ubuf *dummy_ubuf;
|
||||||
|
|
||||||
struct io_restriction restrictions;
|
struct io_restriction restrictions;
|
||||||
|
|
||||||
@ -702,7 +703,8 @@ enum {
|
|||||||
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
|
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
|
||||||
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
|
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
|
||||||
|
|
||||||
REQ_F_FAIL_LINK_BIT,
|
/* first byte is taken by user flags, shift it to not overlap */
|
||||||
|
REQ_F_FAIL_LINK_BIT = 8,
|
||||||
REQ_F_INFLIGHT_BIT,
|
REQ_F_INFLIGHT_BIT,
|
||||||
REQ_F_CUR_POS_BIT,
|
REQ_F_CUR_POS_BIT,
|
||||||
REQ_F_NOWAIT_BIT,
|
REQ_F_NOWAIT_BIT,
|
||||||
@ -1157,6 +1159,12 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
|||||||
goto err;
|
goto err;
|
||||||
__hash_init(ctx->cancel_hash, 1U << hash_bits);
|
__hash_init(ctx->cancel_hash, 1U << hash_bits);
|
||||||
|
|
||||||
|
ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
|
||||||
|
if (!ctx->dummy_ubuf)
|
||||||
|
goto err;
|
||||||
|
/* set invalid range, so io_import_fixed() fails meeting it */
|
||||||
|
ctx->dummy_ubuf->ubuf = -1UL;
|
||||||
|
|
||||||
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
|
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
|
||||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
|
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
|
||||||
goto err;
|
goto err;
|
||||||
@ -1184,6 +1192,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
|||||||
INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
|
INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
|
||||||
return ctx;
|
return ctx;
|
||||||
err:
|
err:
|
||||||
|
kfree(ctx->dummy_ubuf);
|
||||||
kfree(ctx->cancel_hash);
|
kfree(ctx->cancel_hash);
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -3977,7 +3986,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
buf->addr = addr;
|
buf->addr = addr;
|
||||||
buf->len = pbuf->len;
|
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
|
||||||
buf->bid = bid;
|
buf->bid = bid;
|
||||||
addr += pbuf->len;
|
addr += pbuf->len;
|
||||||
bid++;
|
bid++;
|
||||||
@ -6503,14 +6512,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|||||||
req->work.creds = NULL;
|
req->work.creds = NULL;
|
||||||
|
|
||||||
/* enforce forwards compatibility on users */
|
/* enforce forwards compatibility on users */
|
||||||
if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
|
if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
|
||||||
req->flags = 0;
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(req->opcode >= IORING_OP_LAST))
|
if (unlikely(req->opcode >= IORING_OP_LAST))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
|
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
|
|
||||||
@ -7539,6 +7544,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
|
|||||||
io_ring_submit_lock(ctx, lock_ring);
|
io_ring_submit_lock(ctx, lock_ring);
|
||||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||||
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
|
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
|
||||||
|
ctx->cq_extra++;
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||||
io_cqring_ev_posted(ctx);
|
io_cqring_ev_posted(ctx);
|
||||||
@ -8111,11 +8117,13 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
|
|||||||
struct io_mapped_ubuf *imu = *slot;
|
struct io_mapped_ubuf *imu = *slot;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
for (i = 0; i < imu->nr_bvecs; i++)
|
if (imu != ctx->dummy_ubuf) {
|
||||||
unpin_user_page(imu->bvec[i].bv_page);
|
for (i = 0; i < imu->nr_bvecs; i++)
|
||||||
if (imu->acct_pages)
|
unpin_user_page(imu->bvec[i].bv_page);
|
||||||
io_unaccount_mem(ctx, imu->acct_pages);
|
if (imu->acct_pages)
|
||||||
kvfree(imu);
|
io_unaccount_mem(ctx, imu->acct_pages);
|
||||||
|
kvfree(imu);
|
||||||
|
}
|
||||||
*slot = NULL;
|
*slot = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8132,7 +8140,7 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
|
|||||||
for (i = 0; i < ctx->nr_user_bufs; i++)
|
for (i = 0; i < ctx->nr_user_bufs; i++)
|
||||||
io_buffer_unmap(ctx, &ctx->user_bufs[i]);
|
io_buffer_unmap(ctx, &ctx->user_bufs[i]);
|
||||||
kfree(ctx->user_bufs);
|
kfree(ctx->user_bufs);
|
||||||
kfree(ctx->buf_data);
|
io_rsrc_data_free(ctx->buf_data);
|
||||||
ctx->user_bufs = NULL;
|
ctx->user_bufs = NULL;
|
||||||
ctx->buf_data = NULL;
|
ctx->buf_data = NULL;
|
||||||
ctx->nr_user_bufs = 0;
|
ctx->nr_user_bufs = 0;
|
||||||
@ -8255,6 +8263,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
|||||||
size_t size;
|
size_t size;
|
||||||
int ret, pret, nr_pages, i;
|
int ret, pret, nr_pages, i;
|
||||||
|
|
||||||
|
if (!iov->iov_base) {
|
||||||
|
*pimu = ctx->dummy_ubuf;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ubuf = (unsigned long) iov->iov_base;
|
ubuf = (unsigned long) iov->iov_base;
|
||||||
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||||
start = ubuf >> PAGE_SHIFT;
|
start = ubuf >> PAGE_SHIFT;
|
||||||
@ -8352,7 +8365,9 @@ static int io_buffer_validate(struct iovec *iov)
|
|||||||
* constraints here, we'll -EINVAL later when IO is
|
* constraints here, we'll -EINVAL later when IO is
|
||||||
* submitted if they are wrong.
|
* submitted if they are wrong.
|
||||||
*/
|
*/
|
||||||
if (!iov->iov_base || !iov->iov_len)
|
if (!iov->iov_base)
|
||||||
|
return iov->iov_len ? -EFAULT : 0;
|
||||||
|
if (!iov->iov_len)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
/* arbitrary limit, but we need something */
|
/* arbitrary limit, but we need something */
|
||||||
@ -8385,7 +8400,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
ret = io_buffers_map_alloc(ctx, nr_args);
|
ret = io_buffers_map_alloc(ctx, nr_args);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
kfree(data);
|
io_rsrc_data_free(data);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8402,6 +8417,10 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||||||
ret = io_buffer_validate(&iov);
|
ret = io_buffer_validate(&iov);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
if (!iov.iov_base && tag) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
|
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
|
||||||
&last_hpage);
|
&last_hpage);
|
||||||
@ -8451,12 +8470,16 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
|||||||
err = io_buffer_validate(&iov);
|
err = io_buffer_validate(&iov);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
|
if (!iov.iov_base && tag) {
|
||||||
|
err = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
|
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
i = array_index_nospec(offset, ctx->nr_user_bufs);
|
i = array_index_nospec(offset, ctx->nr_user_bufs);
|
||||||
if (ctx->user_bufs[i]) {
|
if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
|
||||||
err = io_queue_rsrc_removal(ctx->buf_data, offset,
|
err = io_queue_rsrc_removal(ctx->buf_data, offset,
|
||||||
ctx->rsrc_node, ctx->user_bufs[i]);
|
ctx->rsrc_node, ctx->user_bufs[i]);
|
||||||
if (unlikely(err)) {
|
if (unlikely(err)) {
|
||||||
@ -8604,6 +8627,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
|||||||
if (ctx->hash_map)
|
if (ctx->hash_map)
|
||||||
io_wq_put_hash(ctx->hash_map);
|
io_wq_put_hash(ctx->hash_map);
|
||||||
kfree(ctx->cancel_hash);
|
kfree(ctx->cancel_hash);
|
||||||
|
kfree(ctx->dummy_ubuf);
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -9607,7 +9631,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
/* always set a rsrc node */
|
/* always set a rsrc node */
|
||||||
io_rsrc_node_switch_start(ctx);
|
ret = io_rsrc_node_switch_start(ctx);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
io_rsrc_node_switch(ctx, NULL);
|
io_rsrc_node_switch(ctx, NULL);
|
||||||
|
|
||||||
memset(&p->sq_off, 0, sizeof(p->sq_off));
|
memset(&p->sq_off, 0, sizeof(p->sq_off));
|
||||||
@ -10136,6 +10162,13 @@ static int __init io_uring_init(void)
|
|||||||
BUILD_BUG_SQE_ELEM(42, __u16, personality);
|
BUILD_BUG_SQE_ELEM(42, __u16, personality);
|
||||||
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
|
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
|
||||||
|
sizeof(struct io_uring_rsrc_update));
|
||||||
|
BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
|
||||||
|
sizeof(struct io_uring_rsrc_update2));
|
||||||
|
/* should fit into one byte */
|
||||||
|
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
|
||||||
|
|
||||||
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
|
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
|
||||||
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
|
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
|
||||||
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
|
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
|
||||||
|
Loading…
Reference in New Issue
Block a user