summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io-wq.c16
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/rsrc.c10
-rw-r--r--io_uring/slist.h5
-rw-r--r--io_uring/sqpoll.c1
-rw-r--r--io_uring/uring_cmd.c4
7 files changed, 31 insertions, 13 deletions
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 411bb2d1acd4..f81c0a7136a5 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -616,7 +616,7 @@ static int io_wqe_worker(void *data)
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
- bool last_timeout = false;
+ bool exit_mask = false, last_timeout = false;
char buf[TASK_COMM_LEN];
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
@@ -632,8 +632,11 @@ static int io_wqe_worker(void *data)
io_worker_handle_work(worker);
raw_spin_lock(&wqe->lock);
- /* timed out, exit unless we're the last worker */
- if (last_timeout && acct->nr_workers > 1) {
+ /*
+ * Last sleep timed out. Exit if we're not the last worker,
+ * or if someone modified our affinity.
+ */
+ if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
acct->nr_workers--;
raw_spin_unlock(&wqe->lock);
__set_current_state(TASK_RUNNING);
@@ -652,7 +655,11 @@ static int io_wqe_worker(void *data)
continue;
break;
}
- last_timeout = !ret;
+ if (!ret) {
+ last_timeout = true;
+ exit_mask = !cpumask_test_cpu(raw_smp_processor_id(),
+ wqe->cpu_mask);
+ }
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
@@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
tsk->worker_private = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
- tsk->flags |= PF_NO_SETAFFINITY;
raw_spin_lock(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index fd1cc35a1c00..722624b6d0dc 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- struct io_wq_work_node *node, *prev;
struct io_submit_state *state = &ctx->submit_state;
+ struct io_wq_work_node *node;
__io_cq_lock(ctx);
/* must come first to preserve CQE ordering in failure cases */
if (state->cqes_count)
__io_flush_post_cqes(ctx);
- wq_list_for_each(node, prev, &state->compl_reqs) {
+ __wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 8803c0979e2a..85fd7ce5f05b 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -202,7 +202,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
* completes with -EOVERFLOW, then the sender must ensure that a
* later IORING_OP_MSG_RING delivers the message.
*/
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
ret = -EOVERFLOW;
out_unlock:
io_double_unlock_ctx(target_ctx);
@@ -229,6 +229,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx;
struct file *src_file = msg->src_file;
+ if (msg->len)
+ return -EINVAL;
if (target_ctx == ctx)
return -EINVAL;
if (target_ctx->flags & IORING_SETUP_R_DISABLED)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 056f40946ff6..e2bac9f89902 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -410,7 +410,7 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
unsigned nr, struct io_rsrc_data **pdata)
{
struct io_rsrc_data *data;
- int ret = -ENOMEM;
+ int ret = 0;
unsigned i;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1235,7 +1235,13 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
}
}
if (folio) {
- folio_put_refs(folio, nr_pages - 1);
+ /*
+ * The pages are bound to the folio, it doesn't
+ * actually unpin them but drops all but one reference,
+ * which is usually put down by io_buffer_unmap().
+ * Note, needs a better helper.
+ */
+ unpin_user_pages(&pages[1], nr_pages - 1);
nr_pages = 1;
}
}
diff --git a/io_uring/slist.h b/io_uring/slist.h
index 7c198a40d5f1..0eb194817242 100644
--- a/io_uring/slist.h
+++ b/io_uring/slist.h
@@ -3,6 +3,9 @@
#include <linux/io_uring_types.h>
+#define __wq_list_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = (pos)->next)
+
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
@@ -113,4 +116,4 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
return container_of(work->list.next, struct io_wq_work, list);
}
-#endif // INTERNAL_IO_SLIST_H \ No newline at end of file
+#endif // INTERNAL_IO_SLIST_H
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 0119d3f1a556..9db4bc1f521a 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -233,7 +233,6 @@ static int io_sq_thread(void *data)
set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
else
set_cpus_allowed_ptr(current, cpu_online_mask);
- current->flags |= PF_NO_SETAFFINITY;
mutex_lock(&sqd->lock);
while (1) {
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 446a189b78b0..2e4c483075d3 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
struct file *file = req->file;
int ret;
- if (!req->file->f_op->uring_cmd)
+ if (!file->f_op->uring_cmd)
return -EOPNOTSUPP;
ret = security_uring_cmd(ioucmd);
@@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
if (ctx->flags & IORING_SETUP_CQE32)
issue_flags |= IO_URING_F_CQE32;
if (ctx->flags & IORING_SETUP_IOPOLL) {
+ if (!file->f_op->uring_cmd_iopoll)
+ return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0;
WRITE_ONCE(ioucmd->cookie, NULL);