mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 00:20:52 -05:00
io_uring: lockless task list
With networking use cases we see contention on the spinlock used to protect the task_list when multiple threads try and add completions at once. Instead we can use a lockless list, and assume that the first caller to add to the list is responsible for kicking off task work. Signed-off-by: Dylan Yudaken <dylany@fb.com> Link: https://lore.kernel.org/r/20220622134028.2013417-4-dylany@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
c34398a8c0
commit
f88262e60b
4 changed files with 14 additions and 35 deletions
|
@ -428,7 +428,7 @@ typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
|
|||
|
||||
struct io_task_work {
|
||||
union {
|
||||
struct io_wq_work_node node;
|
||||
struct llist_node node;
|
||||
struct llist_node fallback_node;
|
||||
};
|
||||
io_req_tw_func_t func;
|
||||
|
|
|
@ -986,11 +986,12 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
|
|||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
|
||||
static void handle_tw_list(struct io_wq_work_node *node,
|
||||
|
||||
static void handle_tw_list(struct llist_node *node,
|
||||
struct io_ring_ctx **ctx, bool *locked)
|
||||
{
|
||||
do {
|
||||
struct io_wq_work_node *next = node->next;
|
||||
struct llist_node *next = node->next;
|
||||
struct io_kiocb *req = container_of(node, struct io_kiocb,
|
||||
io_task_work.node);
|
||||
|
||||
|
@ -1014,23 +1015,11 @@ void tctx_task_work(struct callback_head *cb)
|
|||
struct io_ring_ctx *ctx = NULL;
|
||||
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
|
||||
task_work);
|
||||
struct llist_node *node = llist_del_all(&tctx->task_list);
|
||||
|
||||
while (1) {
|
||||
struct io_wq_work_node *node;
|
||||
|
||||
spin_lock_irq(&tctx->task_lock);
|
||||
node = tctx->task_list.first;
|
||||
INIT_WQ_LIST(&tctx->task_list);
|
||||
if (!node)
|
||||
tctx->task_running = false;
|
||||
spin_unlock_irq(&tctx->task_lock);
|
||||
if (!node)
|
||||
break;
|
||||
if (node) {
|
||||
handle_tw_list(node, &ctx, &uring_locked);
|
||||
cond_resched();
|
||||
|
||||
if (data_race(!tctx->task_list.first) && uring_locked)
|
||||
io_submit_flush_completions(ctx);
|
||||
}
|
||||
|
||||
ctx_flush_and_put(ctx, &uring_locked);
|
||||
|
@ -1044,16 +1033,10 @@ void io_req_task_work_add(struct io_kiocb *req)
|
|||
{
|
||||
struct io_uring_task *tctx = req->task->io_uring;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_wq_work_node *node;
|
||||
unsigned long flags;
|
||||
struct llist_node *node;
|
||||
bool running;
|
||||
|
||||
spin_lock_irqsave(&tctx->task_lock, flags);
|
||||
wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
|
||||
running = tctx->task_running;
|
||||
if (!running)
|
||||
tctx->task_running = true;
|
||||
spin_unlock_irqrestore(&tctx->task_lock, flags);
|
||||
running = !llist_add(&req->io_task_work.node, &tctx->task_list);
|
||||
|
||||
/* task_work already pending, we're done */
|
||||
if (running)
|
||||
|
@ -1065,11 +1048,8 @@ void io_req_task_work_add(struct io_kiocb *req)
|
|||
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&tctx->task_lock, flags);
|
||||
tctx->task_running = false;
|
||||
node = tctx->task_list.first;
|
||||
INIT_WQ_LIST(&tctx->task_list);
|
||||
spin_unlock_irqrestore(&tctx->task_lock, flags);
|
||||
|
||||
node = llist_del_all(&tctx->task_list);
|
||||
|
||||
while (node) {
|
||||
req = container_of(node, struct io_kiocb, io_task_work.node);
|
||||
|
|
|
@ -86,8 +86,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
|
|||
atomic_set(&tctx->in_idle, 0);
|
||||
atomic_set(&tctx->inflight_tracked, 0);
|
||||
task->io_uring = tctx;
|
||||
spin_lock_init(&tctx->task_lock);
|
||||
INIT_WQ_LIST(&tctx->task_list);
|
||||
init_llist_head(&tctx->task_list);
|
||||
init_task_work(&tctx->task_work, tctx_task_work);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/llist.h>
|
||||
|
||||
/*
|
||||
* Arbitrary limit, can be raised if need be
|
||||
*/
|
||||
|
@ -19,9 +21,7 @@ struct io_uring_task {
|
|||
struct percpu_counter inflight;
|
||||
|
||||
struct { /* task_work */
|
||||
spinlock_t task_lock;
|
||||
bool task_running;
|
||||
struct io_wq_work_list task_list;
|
||||
struct llist_head task_list;
|
||||
struct callback_head task_work;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue