summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-26 01:51:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-26 01:51:08 +0300
commitf6f360aef0e70a45cbf43db1dd9df5a5e96d9836 (patch)
treebfaf38f7fdd13b3030ba767388691887f8c892b1
parent2d70de4ee5931455811cd0ce692230785ae1c3ce (diff)
parent7df778be2f61e1a23002d1f2f5d6aaf702771eb8 (diff)
downloadlinux-f6f360aef0e70a45cbf43db1dd9df5a5e96d9836.tar.xz
Merge tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "This one looks a bit bigger than it is, but that's mainly because 2/3 of it is enabling IORING_OP_CLOSE to close direct file descriptors. We've had a few folks using them and finding it confusing that the way to close them is through using -1 for file update, this just brings API symmetry for direct descriptors. Hence I think we should just do this now and have a better API for 5.15 release. There's some room for de-duplicating the close code, but we're leaving that for the next merge window. Outside of that, just small fixes: - Poll race fixes (Hao) - io-wq core dump exit fix (me) - Reschedule around potentially intensive tctx and buffer iterators on teardown (me) - Fix for always ending up punting files update to io-wq (me) - Put the provided buffer meta data under memcg accounting (me) - Tweak for io_write(), removing dead code that was added with the iterator changes in this release (Pavel)" * tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block: io_uring: make OP_CLOSE consistent with direct open io_uring: kill extra checks in io_write() io_uring: don't punt files update to io-wq unconditionally io_uring: put provided buffer meta data under memcg accounting io_uring: allow conditional reschedule for intensive iterators io_uring: fix potential req refcount underflow io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow io_uring: fix race between poll completion and cancel_hash insertion io-wq: ensure we exit if thread group is exiting
-rw-r--r--fs/io-wq.c3
-rw-r--r--fs/io_uring.c85
2 files changed, 72 insertions, 16 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index c2e0e8e80949..c2360cdc403d 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -584,7 +584,8 @@ loop:
if (!get_signal(&ksig))
continue;
- if (fatal_signal_pending(current))
+ if (fatal_signal_pending(current) ||
+ signal_group_exit(current->signal))
break;
continue;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e372d5b9f6dc..82f867983bb3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -502,6 +502,7 @@ struct io_poll_update {
struct io_close {
struct file *file;
int fd;
+ u32 file_slot;
};
struct io_timeout_data {
@@ -1098,6 +1099,8 @@ static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
+static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
+
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static struct kmem_cache *req_cachep;
@@ -3605,7 +3608,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_save_state(iter, state);
}
req->result = iov_iter_count(iter);
- ret2 = 0;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
@@ -3670,8 +3672,6 @@ done:
} else {
copy_iov:
iov_iter_restore(iter, state);
- if (ret2 > 0)
- iov_iter_advance(iter, ret2);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
return ret ?: -EAGAIN;
}
@@ -4387,7 +4387,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
int i, bid = pbuf->bid;
for (i = 0; i < pbuf->nbufs; i++) {
- buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
if (!buf)
break;
@@ -4594,12 +4594,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
+ req->close.file_slot = READ_ONCE(sqe->file_index);
+ if (req->close.file_slot && req->close.fd)
+ return -EINVAL;
+
return 0;
}
@@ -4611,6 +4615,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
struct file *file = NULL;
int ret = -EBADF;
+ if (req->close.file_slot) {
+ ret = io_close_fixed(req, issue_flags);
+ goto err;
+ }
+
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
if (close->fd >= fdt->max_fds) {
@@ -5338,7 +5347,7 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
if (req->poll.events & EPOLLONESHOT)
flags = 0;
if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
- req->poll.done = true;
+ req->poll.events |= EPOLLONESHOT;
flags = 0;
}
if (flags & IORING_CQE_F_MORE)
@@ -5367,10 +5376,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} else {
bool done;
+ if (req->poll.done) {
+ spin_unlock(&ctx->completion_lock);
+ return;
+ }
done = __io_poll_complete(req, req->result);
if (done) {
io_poll_remove_double(req);
hash_del(&req->hash_node);
+ req->poll.done = true;
} else {
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
@@ -5508,6 +5522,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked)
hash_del(&req->hash_node);
io_poll_remove_double(req);
+ apoll->poll.done = true;
spin_unlock(&ctx->completion_lock);
if (!READ_ONCE(apoll->poll.canceled))
@@ -5828,6 +5843,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt;
__poll_t mask;
+ bool done;
ipt.pt._qproc = io_poll_queue_proc;
@@ -5836,13 +5852,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
- io_poll_complete(req, mask);
+ done = io_poll_complete(req, mask);
}
spin_unlock(&ctx->completion_lock);
if (mask) {
io_cqring_ev_posted(ctx);
- if (poll->events & EPOLLONESHOT)
+ if (done)
io_put_req(req);
}
return ipt.error;
@@ -6333,19 +6349,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
struct io_uring_rsrc_update2 up;
int ret;
- if (issue_flags & IO_URING_F_NONBLOCK)
- return -EAGAIN;
-
up.offset = req->rsrc_update.offset;
up.data = req->rsrc_update.arg;
up.nr = 0;
up.tags = 0;
up.resv = 0;
- mutex_lock(&ctx->uring_lock);
+ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
&up, req->rsrc_update.nr_args);
- mutex_unlock(&ctx->uring_lock);
+ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
if (ret < 0)
req_set_fail(req);
@@ -8400,6 +8413,44 @@ err:
return ret;
}
+static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+{
+ unsigned int offset = req->close.file_slot - 1;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_fixed_file *file_slot;
+ struct file *file;
+ int ret, i;
+
+ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+ ret = -ENXIO;
+ if (unlikely(!ctx->file_data))
+ goto out;
+ ret = -EINVAL;
+ if (offset >= ctx->nr_user_files)
+ goto out;
+ ret = io_rsrc_node_switch_start(ctx);
+ if (ret)
+ goto out;
+
+ i = array_index_nospec(offset, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, i);
+ ret = -EBADF;
+ if (!file_slot->file_ptr)
+ goto out;
+
+ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+ ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+ if (ret)
+ goto out;
+
+ file_slot->file_ptr = 0;
+ io_rsrc_node_switch(ctx, ctx->file_data);
+ ret = 0;
+out:
+ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+ return ret;
+}
+
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
@@ -9166,8 +9217,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
struct io_buffer *buf;
unsigned long index;
- xa_for_each(&ctx->io_buffers, index, buf)
+ xa_for_each(&ctx->io_buffers, index, buf) {
__io_remove_buffers(ctx, buf, index, -1U);
+ cond_resched();
+ }
}
static void io_req_cache_free(struct list_head *list)
@@ -9665,8 +9718,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
struct io_tctx_node *node;
unsigned long index;
- xa_for_each(&tctx->xa, index, node)
+ xa_for_each(&tctx->xa, index, node) {
io_uring_del_tctx_node(index);
+ cond_resched();
+ }
if (wq) {
/*
* Must be after io_uring_del_task_file() (removes nodes under