diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 202 |
1 files changed, 96 insertions, 106 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 381d50becd04..bb25e3997d41 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -524,6 +524,7 @@ enum { REQ_F_OVERFLOW_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, + REQ_F_NO_FILE_TABLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -577,6 +578,8 @@ enum { REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* doesn't need file table for this request */ + REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), }; struct async_poll { @@ -616,6 +619,8 @@ struct io_kiocb { bool needs_fixed_file; u8 opcode; + u16 buf_index; + struct io_ring_ctx *ctx; struct list_head list; unsigned int flags; @@ -677,8 +682,6 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; - /* needs req->file assigned IFF fd is >= 0 */ - unsigned fd_non_neg : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -781,8 +784,6 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_OPENAT] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -796,9 +797,8 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_STATX] = { .needs_mm = 1, - .needs_file = 1, - .fd_non_neg = 1, .needs_fs = 1, + .file_table = 1, }, [IORING_OP_READ] = { .needs_mm = 1, @@ -833,8 +833,6 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, }, [IORING_OP_OPENAT2] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -928,6 +926,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) goto err; ctx->flags = p->flags; + init_waitqueue_head(&ctx->sqo_wait); init_waitqueue_head(&ctx->cq_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); init_completion(&ctx->completions[0]); @@ -1291,7 +1290,7 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx) struct io_kiocb *req; req = ctx->fallback_req; - if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req)) + if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req)) return req; return NULL; @@ -1378,7 +1377,7 @@ static void __io_free_req(struct io_kiocb *req) if (likely(!io_is_fallback_req(req))) kmem_cache_free(req_cachep, req); else - clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req); + clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req); } struct req_batch { @@ -1398,10 +1397,6 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb) for (i = 0; i < rb->to_free; i++) { struct io_kiocb *req = rb->reqs[i]; - if (req->flags & REQ_F_FIXED_FILE) { - req->file = NULL; - percpu_ref_put(req->fixed_file_refs); - } if (req->flags & REQ_F_INFLIGHT) inflight++; __io_req_aux_free(req); @@ -1674,7 +1669,7 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req) if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req)) return false; - if (!(req->flags & REQ_F_FIXED_FILE) || req->io) + if (req->file || req->io) rb->need_iter++; rb->reqs[rb->to_free++] = req; @@ -2034,7 +2029,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) * any file. For now, just ensure that anything potentially problematic is done * inline. */ -static bool io_file_supports_async(struct file *file) +static bool io_file_supports_async(struct file *file, int rw) { umode_t mode = file_inode(file)->i_mode; @@ -2043,7 +2038,13 @@ static bool io_file_supports_async(struct file *file) if (S_ISREG(mode) && file->f_op != &io_uring_fops) return true; - return false; + if (!(file->f_mode & FMODE_NOWAIT)) + return false; + + if (rw == READ) + return file->f_op->read_iter != NULL; + + return file->f_op->write_iter != NULL; } static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, @@ -2102,9 +2103,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); - /* we own ->private, reuse it for the buffer index / buffer ID */ - req->rw.kiocb.private = (void *) (unsigned long) - READ_ONCE(sqe->buf_index); + req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -2147,7 +2146,7 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw, struct io_ring_ctx *ctx = req->ctx; size_t len = req->rw.len; struct io_mapped_ubuf *imu; - unsigned index, buf_index; + u16 index, buf_index; size_t offset; u64 buf_addr; @@ -2155,7 +2154,7 @@ static ssize_t io_import_fixed(struct io_kiocb *req, int rw, if (unlikely(!ctx->user_bufs)) return -EFAULT; - buf_index = (unsigned long) req->rw.kiocb.private; + buf_index = req->buf_index; if (unlikely(buf_index >= ctx->nr_user_bufs)) return -EFAULT; @@ -2271,10 +2270,10 @@ static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len, bool needs_lock) { struct io_buffer *kbuf; - int bgid; + u16 bgid; kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; - bgid = (int) (unsigned long) req->rw.kiocb.private; + bgid = req->buf_index; kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock); if (IS_ERR(kbuf)) return kbuf; @@ -2365,7 +2364,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, } /* buffer index only valid with fixed read/write, or buffer select */ - if (req->rw.kiocb.private && !(req->flags & REQ_F_BUFFER_SELECT)) + if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { @@ -2571,7 +2570,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, READ)) goto copy_iov; iov_count = iov_iter_count(&iter); @@ -2594,7 +2593,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - if (!(req->flags & REQ_F_NOWAIT)) + if (!(req->flags & REQ_F_NOWAIT) && + !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } @@ -2662,7 +2662,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, WRITE)) goto copy_iov; /* file path doesn't support NOWAIT for non-direct_IO */ @@ -2716,7 +2716,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - req->flags |= REQ_F_MUST_PUNT; + if (!file_can_poll(req->file)) + req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } } @@ -2756,15 +2757,6 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_splice_punt(struct file *file) -{ - if (get_pipe_info(file)) - return false; - if (!io_file_supports_async(file)) - return true; - return !(file->f_flags & O_NONBLOCK); -} - static int io_splice(struct io_kiocb *req, bool force_nonblock) { struct io_splice *sp = &req->splice; @@ -2772,19 +2764,16 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; - long ret; + long ret = 0; - if (force_nonblock) { - if (io_splice_punt(in) || io_splice_punt(out)) - return -EAGAIN; - flags |= SPLICE_F_NONBLOCK; - } + if (force_nonblock) + return -EAGAIN; poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; - ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); - if (force_nonblock && ret == -EAGAIN) - return -EAGAIN; + + if (sp->len) + ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED)); req->flags &= ~REQ_F_NEED_CLEANUP; @@ -3355,8 +3344,12 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) struct kstat stat; int ret; - if (force_nonblock) + if (force_nonblock) { + /* only need file table for an actual valid fd */ + if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD) + req->flags |= REQ_F_NO_FILE_TABLE; return -EAGAIN; + } if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->how.flags)) return -EINVAL; @@ -3502,7 +3495,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) if (io_req_cancelled(req)) return; __io_sync_file_range(req); - io_put_req(req); /* put submission ref */ + io_steal_work(req, workptr); } static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) @@ -4142,12 +4135,14 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, req->result = mask; init_task_work(&req->task_work, func); /* - * If this fails, then the task is exiting. Punt to one of the io-wq - * threads to ensure the work gets run, we can't always rely on exit - * cancelation taking care of this. + * If this fails, then the task is exiting. When a task exits, the + * work gets canceled, so just cancel this request as well instead + * of executing it. We can't safely execute it anyway, as we may not + * have the needed state needed for it anyway. */ ret = task_work_add(tsk, &req->task_work, true); if (unlikely(ret)) { + WRITE_ONCE(poll->canceled, true); tsk = io_wq_get_task(req->ctx->io_wq); task_work_add(tsk, &req->task_work, true); } @@ -4200,17 +4195,17 @@ static void io_async_task_func(struct callback_head *cb) spin_unlock_irq(&ctx->completion_lock); + /* restore ->work in case we need to retry again */ + memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (canceled) { kfree(apoll); io_cqring_ev_posted(ctx); req_set_fail_links(req); - io_put_req(req); + io_double_put_req(req); return; } - /* restore ->work in case we need to retry again */ - memcpy(&req->work, &apoll->work, sizeof(req->work)); - __set_current_state(TASK_RUNNING); mutex_lock(&ctx->uring_lock); __io_queue_sqe(req, NULL); @@ -4369,7 +4364,7 @@ static bool io_poll_remove_one(struct io_kiocb *req) hash_del(&req->hash_node); - if (apoll) { + if (do_complete && apoll) { /* * restore ->work because we need to call io_req_work_drop_env. */ @@ -5015,15 +5010,16 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; /* Still need defer if there is pending req in defer list. */ - if (!req_need_defer(req) && list_empty(&ctx->defer_list)) + if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list)) return 0; - if (!req->io && io_alloc_async_ctx(req)) - return -EAGAIN; - - ret = io_req_defer_prep(req, sqe); - if (ret < 0) - return ret; + if (!req->io) { + if (io_alloc_async_ctx(req)) + return -EAGAIN; + ret = io_req_defer_prep(req, sqe); + if (ret < 0) + return ret; + } spin_lock_irq(&ctx->completion_lock); if (!req_need_defer(req) && list_empty(&ctx->defer_list)) { @@ -5310,7 +5306,8 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (ret) return ret; - if (ctx->flags & IORING_SETUP_IOPOLL) { + /* If the op doesn't have a file, we're not polling for it */ + if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) { const bool in_async = io_wq_current_is_worker(); if (req->result == -EAGAIN) @@ -5364,15 +5361,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_steal_work(req, workptr); } -static int io_req_needs_file(struct io_kiocb *req, int fd) -{ - if (!io_op_defs[req->opcode].needs_file) - return 0; - if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg) - return 0; - return 1; -} - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { @@ -5410,14 +5398,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd, unsigned int flags) + int fd) { bool fixed; - if (!io_req_needs_file(req, fd)) - return 0; - - fixed = (flags & IOSQE_FIXED_FILE); + fixed = (req->flags & REQ_F_FIXED_FILE) != 0; if (unlikely(!fixed && req->needs_fixed_file)) return -EBADF; @@ -5429,7 +5414,7 @@ static int io_grab_files(struct io_kiocb *req) int ret = -EBADF; struct io_ring_ctx *ctx = req->ctx; - if (req->work.files) + if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE)) return 0; if (!ctx->ring_file) return -EBADF; @@ -5623,9 +5608,15 @@ fail_req: io_double_put_req(req); } } else if (req->flags & REQ_F_FORCE_ASYNC) { - ret = io_req_defer_prep(req, sqe); - if (unlikely(ret < 0)) - goto fail_req; + if (!req->io) { + ret = -EAGAIN; + if (io_alloc_async_ctx(req)) + goto fail_req; + ret = io_req_defer_prep(req, sqe); + if (unlikely(ret < 0)) + goto fail_req; + } + /* * Never try inline submit of IOSQE_ASYNC is set, go straight * to async execution. @@ -5794,7 +5785,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_state *state, bool async) { unsigned int sqe_flags; - int id, fd; + int id; /* * All io need record the previous position, if LINK vs DARIN, @@ -5846,8 +5837,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - fd = READ_ONCE(sqe->fd); - return io_req_set_file(state, req, fd, sqe_flags); + if (!io_op_defs[req->opcode].needs_file) + return 0; + + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -6039,6 +6032,7 @@ static int io_sq_thread(void *data) finish_wait(&ctx->sqo_wait, &wait); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + ret = 0; continue; } finish_wait(&ctx->sqo_wait, &wait); @@ -6852,7 +6846,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, { int ret; - init_waitqueue_head(&ctx->sqo_wait); mmgrab(current->mm); ctx->sqo_mm = current->mm; @@ -7327,7 +7320,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * it could cause shutdown to hang. */ while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) - cpu_relax(); + cond_resched(); io_kill_timeouts(ctx); io_poll_remove_all(ctx); @@ -7356,11 +7349,9 @@ static int io_uring_release(struct inode *inode, struct file *file) static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { - struct io_kiocb *req; - DEFINE_WAIT(wait); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL; + struct io_kiocb *cancel_req = NULL, *req; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -7400,6 +7391,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, */ if (refcount_sub_and_test(2, &cancel_req->refs)) { io_put_req(cancel_req); + finish_wait(&ctx->inflight_wait, &wait); continue; } } @@ -7407,8 +7399,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_work(ctx->io_wq, &cancel_req->work); io_put_req(cancel_req); schedule(); + finish_wait(&ctx->inflight_wait, &wait); } - finish_wait(&ctx->inflight_wait, &wait); } static int io_uring_flush(struct file *file, void *data) @@ -7757,7 +7749,8 @@ err: return ret; } -static int io_uring_create(unsigned entries, struct io_uring_params *p) +static int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) { struct user_struct *user = NULL; struct io_ring_ctx *ctx; @@ -7849,6 +7842,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup @@ -7857,9 +7858,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: @@ -7875,7 +7873,6 @@ err: static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - long ret; int i; if (copy_from_user(&p, params, sizeof(p))) @@ -7890,14 +7887,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL; - ret = io_uring_create(entries, &p); - if (ret < 0) - return ret; - - if (copy_to_user(params, &p, sizeof(p))) - return -EFAULT; - - return ret; + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, |