diff options
author | Joel Stanley <joel@jms.id.au> | 2020-08-31 03:46:52 +0300 |
---|---|---|
committer | Joel Stanley <joel@jms.id.au> | 2020-08-31 03:46:57 +0300 |
commit | 0dd0c8c492fa70707ca4f0d36dcb2e3c64105b16 (patch) | |
tree | a420abd8f26264544246602c60d161a7cc4de390 /fs/io_uring.c | |
parent | 31d8605658d37d9197a989838508481d5dc1d8bc (diff) | |
parent | 9ece50d8a470ca7235ffd6ac0f9c5f0f201fe2c8 (diff) | |
download | linux-dev-5.8.tar.xz |
Merge tag 'v5.8.5' into dev-5.8dev-5.8
This is the 5.8.5 stable release
Signed-off-by: Joel Stanley <joel@jms.id.au>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 265 |
1 files changed, 208 insertions, 57 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 493e5047e67c..26978630378e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -669,12 +669,12 @@ struct io_kiocb { * restore the work, if needed. */ struct { - struct callback_head task_work; struct hlist_node hash_node; struct async_poll *apoll; }; struct io_wq_work work; }; + struct callback_head task_work; }; #define IO_PLUG_THRESHOLD 2 @@ -1549,12 +1549,9 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) /* * Called if REQ_F_LINK_HEAD is set, and we fail the head request */ -static void io_fail_links(struct io_kiocb *req) +static void __io_fail_links(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); while (!list_empty(&req->link_list)) { struct io_kiocb *link = list_first_entry(&req->link_list, @@ -1568,13 +1565,29 @@ static void io_fail_links(struct io_kiocb *req) io_link_cancel_timeout(link); } else { io_cqring_fill_event(link, -ECANCELED); + link->flags |= REQ_F_COMP_LOCKED; __io_double_put_req(link); } req->flags &= ~REQ_F_LINK_TIMEOUT; } io_commit_cqring(ctx); - spin_unlock_irqrestore(&ctx->completion_lock, flags); +} + +static void io_fail_links(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!(req->flags & REQ_F_COMP_LOCKED)) { + unsigned long flags; + + spin_lock_irqsave(&ctx->completion_lock, flags); + __io_fail_links(req); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else { + __io_fail_links(req); + } + io_cqring_ev_posted(ctx); } @@ -1747,6 +1760,17 @@ static int io_put_kbuf(struct io_kiocb *req) return cflags; } +static inline bool io_run_task_work(void) +{ + if (current->task_works) { + __set_current_state(TASK_RUNNING); + task_work_run(); + return true; + } + + return false; +} + static void io_iopoll_queue(struct list_head *again) { struct io_kiocb *req; @@ -1936,6 +1960,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, */ if (!(++iters & 7)) { mutex_unlock(&ctx->uring_lock); + io_run_task_work(); mutex_lock(&ctx->uring_lock); } @@ -2661,8 +2686,10 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (req->file->f_op->read_iter) ret2 = call_read_iter(req->file, kiocb, &iter); - else + else if (req->file->f_op->read) ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); + else + ret2 = -EINVAL; /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { @@ -2776,8 +2803,10 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) if (req->file->f_op->write_iter) ret2 = call_write_iter(req->file, kiocb, &iter); - else + else if (req->file->f_op->write) ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); + else + ret2 = -EINVAL; if (!force_nonblock) current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; @@ -4088,22 +4117,22 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb) { struct task_struct *tsk = req->task; struct io_ring_ctx *ctx = req->ctx; - int ret, notify = TWA_RESUME; + int ret, notify; /* - * SQPOLL kernel thread doesn't need notification, just a wakeup. - * If we're not using an eventfd, then TWA_RESUME is always fine, - * as we won't have dependencies between request completions for - * other kernel wait conditions. + * SQPOLL kernel thread doesn't need notification, just a wakeup. For + * all other cases, use TWA_SIGNAL unconditionally to ensure we're + * processing task_work. There's no reliable way to tell if TWA_RESUME + * will do the job. */ - if (ctx->flags & IORING_SETUP_SQPOLL) - notify = 0; - else if (ctx->cq_ev_fd) + notify = 0; + if (!(ctx->flags & IORING_SETUP_SQPOLL)) notify = TWA_SIGNAL; ret = task_work_add(tsk, cb, notify); if (!ret) wake_up_process(tsk); + return ret; } @@ -4124,6 +4153,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, tsk = req->task; req->result = mask; init_task_work(&req->task_work, func); + percpu_ref_get(&req->ctx->refs); + /* * If this fails, then the task is exiting. When a task exits, the * work gets canceled, so just cancel this request as well instead @@ -4160,9 +4191,24 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) return false; } -static void io_poll_remove_double(struct io_kiocb *req, void *data) +static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) +{ + /* pure poll stashes this in ->io, poll driven retry elsewhere */ + if (req->opcode == IORING_OP_POLL_ADD) + return (struct io_poll_iocb *) req->io; + return req->apoll->double_poll; +} + +static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) +{ + if (req->opcode == IORING_OP_POLL_ADD) + return &req->poll; + return &req->apoll->poll; +} + +static void io_poll_remove_double(struct io_kiocb *req) { - struct io_poll_iocb *poll = data; + struct io_poll_iocb *poll = io_poll_get_double(req); lockdep_assert_held(&req->ctx->completion_lock); @@ -4182,7 +4228,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) { struct io_ring_ctx *ctx = req->ctx; - io_poll_remove_double(req, req->io); + io_poll_remove_double(req); req->poll.done = true; io_cqring_fill_event(req, error ? error : mangle_poll(mask)); io_commit_cqring(ctx); @@ -4208,6 +4254,7 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) static void io_poll_task_func(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *nxt = NULL; io_poll_task_handler(req, &nxt); @@ -4218,13 +4265,14 @@ static void io_poll_task_func(struct callback_head *cb) __io_queue_sqe(nxt, NULL); mutex_unlock(&ctx->uring_lock); } + percpu_ref_put(&ctx->refs); } static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { struct io_kiocb *req = wait->private; - struct io_poll_iocb *poll = req->apoll->double_poll; + struct io_poll_iocb *poll = io_poll_get_single(req); __poll_t mask = key_to_poll(key); /* for instances that support it check for an event match first: */ @@ -4238,6 +4286,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, done = list_empty(&poll->wait.entry); if (!done) list_del_init(&poll->wait.entry); + /* make sure double remove sees this as being gone */ + wait->private = NULL; spin_unlock(&poll->head->lock); if (!done) __io_async_wake(req, poll, mask, io_poll_task_func); @@ -4313,7 +4363,8 @@ static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, struct io_kiocb *req) { if (io_op_defs[req->opcode].needs_mm && !current->mm) { - if (unlikely(!mmget_not_zero(ctx->sqo_mm))) + if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || + !mmget_not_zero(ctx->sqo_mm))) return -EFAULT; kthread_use_mm(ctx->sqo_mm); } @@ -4332,6 +4383,7 @@ static void io_async_task_func(struct callback_head *cb) if (io_poll_rewait(req, &apoll->poll)) { spin_unlock_irq(&ctx->completion_lock); + percpu_ref_put(&ctx->refs); return; } @@ -4346,7 +4398,7 @@ static void io_async_task_func(struct callback_head *cb) } } - io_poll_remove_double(req, apoll->double_poll); + io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); /* restore ->work in case we need to retry again */ @@ -4356,7 +4408,6 @@ static void io_async_task_func(struct callback_head *cb) kfree(apoll); if (!canceled) { - __set_current_state(TASK_RUNNING); if (io_sq_thread_acquire_mm(ctx, req)) { io_cqring_add_event(req, -EFAULT); goto end_req; @@ -4370,6 +4421,7 @@ end_req: req_set_fail_links(req); io_double_put_req(req); } + percpu_ref_put(&ctx->refs); } static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -4472,8 +4524,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); - if (ret) { - io_poll_remove_double(req, apoll->double_poll); + if (ret || ipt.error) { + io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (req->flags & REQ_F_WORK_INITIALIZED) memcpy(&req->work, &apoll->work, sizeof(req->work)); @@ -4507,14 +4559,13 @@ static bool io_poll_remove_one(struct io_kiocb *req) { bool do_complete; + io_poll_remove_double(req); + if (req->opcode == IORING_OP_POLL_ADD) { - io_poll_remove_double(req, req->io); do_complete = __io_poll_remove_one(req, &req->poll); } else { struct async_poll *apoll = req->apoll; - io_poll_remove_double(req, apoll->double_poll); - /* non-poll requests have submit ref still */ do_complete = __io_poll_remove_one(req, &apoll->poll); if (do_complete) { @@ -4536,6 +4587,7 @@ static bool io_poll_remove_one(struct io_kiocb *req) io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(req->ctx); req->flags |= REQ_F_COMP_LOCKED; + req_set_fail_links(req); io_put_req(req); } @@ -4709,6 +4761,23 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static int __io_timeout_cancel(struct io_kiocb *req) +{ + int ret; + + list_del_init(&req->list); + + ret = hrtimer_try_to_cancel(&req->io->timeout.timer); + if (ret == -1) + return -EALREADY; + + req_set_fail_links(req); + req->flags |= REQ_F_COMP_LOCKED; + io_cqring_fill_event(req, -ECANCELED); + io_put_req(req); + return 0; +} + static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) { struct io_kiocb *req; @@ -4716,7 +4785,6 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) list_for_each_entry(req, &ctx->timeout_list, list) { if (user_data == req->user_data) { - list_del_init(&req->list); ret = 0; break; } @@ -4725,14 +4793,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) if (ret == -ENOENT) return ret; - ret = hrtimer_try_to_cancel(&req->io->timeout.timer); - if (ret == -1) - return -EALREADY; - - req_set_fail_links(req); - io_cqring_fill_event(req, -ECANCELED); - io_put_req(req); - return 0; + return __io_timeout_cancel(req); } static int io_timeout_remove_prep(struct io_kiocb *req, @@ -6082,8 +6143,7 @@ static int io_sq_thread(void *data) if (!list_empty(&ctx->poll_list) || need_resched() || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { - if (current->task_works) - task_work_run(); + io_run_task_work(); cond_resched(); continue; } @@ -6115,8 +6175,7 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); break; } - if (current->task_works) { - task_work_run(); + if (io_run_task_work()) { finish_wait(&ctx->sqo_wait, &wait); continue; } @@ -6145,8 +6204,7 @@ static int io_sq_thread(void *data) timeout = jiffies + ctx->sq_thread_idle; } - if (current->task_works) - task_work_run(); + io_run_task_work(); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); @@ -6211,9 +6269,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, do { if (io_cqring_events(ctx, false) >= min_events) return 0; - if (!current->task_works) + if (!io_run_task_work()) break; - task_work_run(); } while (1); if (sig) { @@ -6235,8 +6292,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ - if (current->task_works) - task_work_run(); + if (io_run_task_work()) + continue; if (signal_pending(current)) { if (current->jobctl & JOBCTL_TASK_WORK) { spin_lock_irq(¤t->sighand->siglock); @@ -7086,6 +7143,9 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, return SIZE_MAX; #endif + if (sq_offset) + *sq_offset = off; + sq_array_size = array_size(sizeof(u32), sq_entries); if (sq_array_size == SIZE_MAX) return SIZE_MAX; @@ -7093,9 +7153,6 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, if (check_add_overflow(off, sq_array_size, &off)) return SIZE_MAX; - if (sq_offset) - *sq_offset = off; - return off; } @@ -7488,6 +7545,98 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) return work->files == files; } +/* + * Returns true if 'preq' is the link parent of 'req' + */ +static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) +{ + struct io_kiocb *link; + + if (!(preq->flags & REQ_F_LINK_HEAD)) + return false; + + list_for_each_entry(link, &preq->link_list, link_list) { + if (link == req) + return true; + } + + return false; +} + +/* + * We're looking to cancel 'req' because it's holding on to our files, but + * 'req' could be a link to another request. See if it is, and cancel that + * parent request if so. + */ +static bool io_poll_remove_link(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + struct hlist_node *tmp; + struct io_kiocb *preq; + bool found = false; + int i; + + spin_lock_irq(&ctx->completion_lock); + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry_safe(preq, tmp, list, hash_node) { + found = io_match_link(preq, req); + if (found) { + io_poll_remove_one(preq); + break; + } + } + } + spin_unlock_irq(&ctx->completion_lock); + return found; +} + +static bool io_timeout_remove_link(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + struct io_kiocb *preq; + bool found = false; + + spin_lock_irq(&ctx->completion_lock); + list_for_each_entry(preq, &ctx->timeout_list, list) { + found = io_match_link(preq, req); + if (found) { + __io_timeout_cancel(preq); + break; + } + } + spin_unlock_irq(&ctx->completion_lock); + return found; +} + +static bool io_cancel_link_cb(struct io_wq_work *work, void *data) +{ + return io_match_link(container_of(work, struct io_kiocb, work), data); +} + +static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + enum io_wq_cancel cret; + + /* cancel this particular work, if it's running */ + cret = io_wq_cancel_work(ctx->io_wq, &req->work); + if (cret != IO_WQ_CANCEL_NOTFOUND) + return; + + /* find links that hold this pending, cancel those */ + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); + if (cret != IO_WQ_CANCEL_NOTFOUND) + return; + + /* if we have a poll link holding this pending, cancel that */ + if (io_poll_remove_link(ctx, req)) + return; + + /* final option, timeout link is holding this req pending */ + io_timeout_remove_link(ctx, req); +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { @@ -7529,10 +7678,10 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, clear_bit(0, &ctx->cq_check_overflow); ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } - spin_unlock_irq(&ctx->completion_lock); - WRITE_ONCE(ctx->rings->cq_overflow, atomic_inc_return(&ctx->cached_cq_overflow)); + io_commit_cqring(ctx); + spin_unlock_irq(&ctx->completion_lock); /* * Put inflight ref and overflow ref. If that's @@ -7544,7 +7693,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, continue; } } else { - io_wq_cancel_work(ctx->io_wq, &cancel_req->work); + /* cancel this request, or head link requests */ + io_attempt_cancel(ctx, cancel_req); io_put_req(cancel_req); } @@ -7655,8 +7805,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, int submitted = 0; struct fd f; - if (current->task_works) - task_work_run(); + io_run_task_work(); if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP)) return -EINVAL; @@ -7828,6 +7977,10 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, struct io_rings *rings; size_t size, sq_array_offset; + /* make sure these are sane, as we already accounted them */ + ctx->sq_entries = p->sq_entries; + ctx->cq_entries = p->cq_entries; + size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); if (size == SIZE_MAX) return -EOVERFLOW; @@ -7844,8 +7997,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->cq_ring_entries = p->cq_entries; ctx->sq_mask = rings->sq_ring_mask; ctx->cq_mask = rings->cq_ring_mask; - ctx->sq_entries = rings->sq_ring_entries; - ctx->cq_entries = rings->cq_ring_entries; size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); if (size == SIZE_MAX) { |