linux/fs/io-wq.h
Jens Axboe 6bf9c47a39 io_uring: defer file assignment
If an application uses direct open or accept, it knows in advance what
direct descriptor value it will get as it picks it itself. This allows
combined requests such as:

sqe = io_uring_get_sqe(ring);
io_uring_prep_openat_direct(sqe, ..., file_slot);
sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;

sqe = io_uring_get_sqe(ring);
io_uring_prep_read(sqe,file_slot, buf, buf_size, 0);
sqe->flags |= IOSQE_FIXED_FILE;

io_uring_submit(ring);

where we prepare both a file open and read, and only get a completion
event for the read when both have completed successfully.

Currently links are fully prepared before the head is issued, but that
fails if the dependent link needs a file assigned that isn't valid until
the head has completed.

Conversely, if the same chain is performed but the fixed file slot is
already valid, then we would be unexpectedly returning data from the
old file slot rather than the newly opened one. Make sure we're
consistent here.

Allow deferral of file setup, which makes this documented case work.

Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-04-07 11:17:28 -06:00

229 lines
5.1 KiB
C

#ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H
#include <linux/refcount.h>
struct io_wq;
enum {
IO_WQ_WORK_CANCEL = 1,
IO_WQ_WORK_HASHED = 2,
IO_WQ_WORK_UNBOUND = 4,
IO_WQ_WORK_CONCURRENT = 16,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
enum io_wq_cancel {
IO_WQ_CANCEL_OK, /* cancelled before started */
IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};
struct io_wq_work_node {
struct io_wq_work_node *next;
};
struct io_wq_work_list {
struct io_wq_work_node *first;
struct io_wq_work_node *last;
};
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
#define wq_list_for_each_resume(pos, prv) \
for (; pos; prv = pos, pos = (pos)->next)
#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
#define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \
} while (0)
static inline void wq_list_add_after(struct io_wq_work_node *node,
struct io_wq_work_node *pos,
struct io_wq_work_list *list)
{
struct io_wq_work_node *next = pos->next;
pos->next = node;
node->next = next;
if (!next)
list->last = node;
}
/**
* wq_list_merge - merge the second list to the first one.
* @list0: the first list
* @list1: the second list
* Return the first node after mergence.
*/
static inline struct io_wq_work_node *wq_list_merge(struct io_wq_work_list *list0,
struct io_wq_work_list *list1)
{
struct io_wq_work_node *ret;
if (!list0->first) {
ret = list1->first;
} else {
ret = list0->first;
list0->last->next = list1->first;
}
INIT_WQ_LIST(list0);
INIT_WQ_LIST(list1);
return ret;
}
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
node->next = NULL;
if (!list->first) {
list->last = node;
WRITE_ONCE(list->first, node);
} else {
list->last->next = node;
list->last = node;
}
}
static inline void wq_list_add_head(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
node->next = list->first;
if (!node->next)
list->last = node;
WRITE_ONCE(list->first, node);
}
static inline void wq_list_cut(struct io_wq_work_list *list,
struct io_wq_work_node *last,
struct io_wq_work_node *prev)
{
/* first in the list, if prev==NULL */
if (!prev)
WRITE_ONCE(list->first, last->next);
else
prev->next = last->next;
if (last == list->last)
list->last = prev;
last->next = NULL;
}
static inline void __wq_list_splice(struct io_wq_work_list *list,
struct io_wq_work_node *to)
{
list->last->next = to->next;
to->next = list->first;
INIT_WQ_LIST(list);
}
static inline bool wq_list_splice(struct io_wq_work_list *list,
struct io_wq_work_node *to)
{
if (!wq_list_empty(list)) {
__wq_list_splice(list, to);
return true;
}
return false;
}
static inline void wq_stack_add_head(struct io_wq_work_node *node,
struct io_wq_work_node *stack)
{
node->next = stack->next;
stack->next = node;
}
static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work_node *node,
struct io_wq_work_node *prev)
{
wq_list_cut(list, node, prev);
}
static inline
struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
{
struct io_wq_work_node *node = stack->next;
stack->next = node->next;
return node;
}
struct io_wq_work {
struct io_wq_work_node list;
unsigned flags;
int fd;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{
if (!work->list.next)
return NULL;
return container_of(work->list.next, struct io_wq_work, list);
}
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work *);
struct io_wq_hash {
refcount_t refs;
unsigned long map;
struct wait_queue_head wait;
};
static inline void io_wq_put_hash(struct io_wq_hash *hash)
{
if (refcount_dec_and_test(&hash->refs))
kfree(hash);
}
struct io_wq_data {
struct io_wq_hash *hash;
struct task_struct *task;
io_wq_work_fn *do_work;
free_work_fn *free_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
int io_wq_max_workers(struct io_wq *wq, int *new_count);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
return work->flags & IO_WQ_WORK_HASHED;
}
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
void *data, bool cancel_all);
#if defined(CONFIG_IO_WQ)
extern void io_wq_worker_sleeping(struct task_struct *);
extern void io_wq_worker_running(struct task_struct *);
#else
static inline void io_wq_worker_sleeping(struct task_struct *tsk)
{
}
static inline void io_wq_worker_running(struct task_struct *tsk)
{
}
#endif
static inline bool io_wq_current_is_worker(void)
{
return in_task() && (current->flags & PF_IO_WORKER) &&
current->worker_private;
}
#endif