diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 93 |
1 files changed, 70 insertions, 23 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8..bf548af0426c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_for_each_link(cur, req) - io_prep_async_work(cur); + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + io_for_each_link(cur, req) + io_prep_async_work(cur); + spin_unlock_irq(&ctx->completion_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } } static void io_queue_async_work(struct io_kiocb *req) @@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req) /* init ->work of the whole link before punting */ io_prep_async_link(req); + + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); io_wq_enqueue(tctx->io_wq, &req->work); @@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb) node = next; } if (wq_list_empty(&tctx->task_list)) { + spin_lock_irq(&tctx->task_lock); clear_bit(0, &tctx->task_state); - if (wq_list_empty(&tctx->task_list)) + if (wq_list_empty(&tctx->task_list)) { + spin_unlock_irq(&tctx->task_lock); break; + } + spin_unlock_irq(&tctx->task_lock); /* another tctx_task_work() is enqueued, yield */ if (test_and_set_bit(0, &tctx->task_state)) break; @@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req) io_req_task_work_add(req); } +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_async_work; + io_req_task_work_add(req); +} + static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void) * Find and free completed poll iocbs */ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, - struct list_head *done) + struct list_head *done, bool resubmit) { struct req_batch rb; struct io_kiocb *req; @@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && + if (READ_ONCE(req->result) == -EAGAIN && resubmit && !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); continue; } @@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, - long min) + long min, bool resubmit) { struct io_kiocb *req, *tmp; LIST_HEAD(done); @@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, } if (!list_empty(&done)) - io_iopoll_complete(ctx, nr_events, &done); + io_iopoll_complete(ctx, nr_events, &done, resubmit); return ret; } @@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) while (!list_empty(&ctx->iopoll_list)) { unsigned int nr_events = 0; - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, false); /* let it sleep and repeat later if can't complete a request */ if (nr_events == 0) @@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) list_empty(&ctx->iopoll_list)) break; } - ret = io_do_iopoll(ctx, &nr_events, min); + ret = io_do_iopoll(ctx, &nr_events, min, true); } while (!ret && nr_events < min && !need_resched()); out: mutex_unlock(&ctx->uring_lock); @@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) */ if (percpu_ref_is_dying(&ctx->refs)) return false; + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; return true; } #else @@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); } else { int cflags = 0; @@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv); struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; + int nr_entries; int error; }; @@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (req->poll.events & EPOLLONESHOT) flags = 0; if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { - io_poll_remove_waitqs(req); req->poll.done = true; flags = 0; } @@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req) done = io_poll_complete(req, req->result); if (done) { + io_poll_remove_double(req); hash_del(&req->hash_node); } else { req->result = 0; @@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, struct io_kiocb *req = pt->req; /* - * If poll->head is already set, it's because the file being polled - * uses multiple waitqueues for poll handling (eg one for read, one - * for write). Setup a separate io_poll_iocb if this happens. + * The file being polled uses multiple waitqueues for poll handling + * (e.g. one for read, one for write). Setup a separate io_poll_iocb + * if this happens. */ - if (unlikely(poll->head)) { + if (unlikely(pt->nr_entries)) { struct io_poll_iocb *poll_one = poll; /* already have a 2nd entry, fail a third attempt */ @@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, *poll_ptr = poll; } - pt->error = 0; + pt->nr_entries++; poll->head = head; if (poll->events & EPOLLEXCLUSIVE) @@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->pt._key = mask; ipt->req = req; - ipt->error = -EINVAL; + ipt->error = 0; + ipt->nr_entries = 0; mask = vfs_poll(req->file, &ipt->pt) & poll->events; + if (unlikely(!ipt->nr_entries) && !ipt->error) + ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); + if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) + io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); if (unlikely(list_empty(&poll->wait.entry))) { @@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret || ipt.error) { - io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (ret) return IO_APOLL_READY; @@ -6792,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->iopoll_list)) - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, true); /* * Don't submit if refs are dying, good for io_uring_register(), @@ -7899,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct io_wq_data data; unsigned int concurrency; + mutex_lock(&ctx->uring_lock); hash = ctx->hash_map; if (!hash) { hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) + if (!hash) { + mutex_unlock(&ctx->uring_lock); return ERR_PTR(-ENOMEM); + } refcount_set(&hash->refs, 1); init_waitqueue_head(&hash->wait); ctx->hash_map = hash; } + mutex_unlock(&ctx->uring_lock); data.hash = hash; data.task = task; @@ -7981,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, f = fdget(p->wq_fd); if (!f.file) return -ENXIO; - fdput(f); - if (f.file->f_op != &io_uring_fops) + if (f.file->f_op != &io_uring_fops) { + fdput(f); return -EINVAL; + } + fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { struct task_struct *tsk; |