summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/fdinfo.c8
-rw-r--r--io_uring/filetable.c2
-rw-r--r--io_uring/io_uring.c2
-rw-r--r--io_uring/kbuf.c33
-rw-r--r--io_uring/memmap.c2
-rw-r--r--io_uring/net.c2
-rw-r--r--io_uring/register.c11
-rw-r--r--io_uring/rsrc.c11
-rw-r--r--io_uring/rw.c8
-rw-r--r--io_uring/sqpoll.c65
-rw-r--r--io_uring/sqpoll.h1
-rw-r--r--io_uring/waitid.c2
-rw-r--r--io_uring/zcrx.c68
-rw-r--r--io_uring/zcrx.h7
14 files changed, 93 insertions, 129 deletions
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index ff3364531c77..294c75a8a3bd 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
{
struct io_overflow_cqe *ocqe;
struct io_rings *r = ctx->rings;
- struct rusage sq_usage;
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
unsigned int sq_head = READ_ONCE(r->sq.head);
unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
* thread termination.
*/
if (tsk) {
+ u64 usec;
+
get_task_struct(tsk);
rcu_read_unlock();
- getrusage(tsk, RUSAGE_SELF, &sq_usage);
+ usec = io_sq_cpu_usec(tsk);
put_task_struct(tsk);
sq_pid = sq->task_pid;
sq_cpu = sq->sq_cpu;
- sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
- + sq_usage.ru_stime.tv_usec);
+ sq_total_time = usec;
sq_work_time = sq->work_time;
} else {
rcu_read_unlock();
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index a21660e3145a..794ef95df293 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table)
static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
u32 slot_index)
- __must_hold(&req->ctx->uring_lock)
+ __must_hold(&ctx->uring_lock)
{
struct io_rsrc_node *node;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 820ef0527666..296667ba712c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
}
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
- struct io_big_cqe *big_cqe)
+ struct io_big_cqe *big_cqe)
{
struct io_overflow_cqe *ocqe;
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index aad655e38672..a727e020fe03 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
return 1;
}
+static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
+{
+ /*
+ * If we came in unlocked, we have no choice but to consume the
+ * buffer here, otherwise nothing ensures that the buffer won't
+ * get used by others. This does mean it'll be pinned until the
+ * IO completes, coming in unlocked means we're being called from
+ * io-wq context and there may be further retries in async hybrid
+ * mode. For the locked case, the caller must call commit when
+ * the transfer completes (or if we get -EAGAIN and must poll of
+ * retry).
+ */
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ return true;
+
+ /* uring_cmd commits kbuf upfront, no need to auto-commit */
+ if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD)
+ return true;
+ return false;
+}
+
static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
struct io_buffer_list *bl,
unsigned int issue_flags)
@@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
sel.buf_list = bl;
sel.addr = u64_to_user_ptr(buf->addr);
- if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
- /*
- * If we came in unlocked, we have no choice but to consume the
- * buffer here, otherwise nothing ensures that the buffer won't
- * get used by others. This does mean it'll be pinned until the
- * IO completes, coming in unlocked means we're being called from
- * io-wq context and there may be further retries in async hybrid
- * mode. For the locked case, the caller must call commit when
- * the transfer completes (or if we get -EAGAIN and must poll of
- * retry).
- */
+ if (io_should_commit(req, issue_flags)) {
io_kbuf_commit(req, sel.buf_list, *len, 1);
sel.buf_list = NULL;
}
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 2e99dffddfc5..add03ca75cb9 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -135,7 +135,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx,
struct io_mapped_region *mr,
struct io_uring_region_desc *reg)
{
- unsigned long size = mr->nr_pages << PAGE_SHIFT;
+ unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT;
struct page **pages;
int nr_pages;
diff --git a/io_uring/net.c b/io_uring/net.c
index f99b90c762fc..a95cc9ca2a4d 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
if (sr->flags & IORING_SEND_VECTORIZED)
- return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
+ return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
}
diff --git a/io_uring/register.c b/io_uring/register.c
index 43f04c47522c..d189b266b8cc 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -421,13 +421,6 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (unlikely(ret))
return ret;
- /* nothing to do, but copy params back */
- if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) {
- if (copy_to_user(arg, &p, sizeof(p)))
- return -EFAULT;
- return 0;
- }
-
size = rings_size(p.flags, p.sq_entries, p.cq_entries,
&sq_array_offset);
if (size == SIZE_MAX)
@@ -613,6 +606,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
if (ret)
return ret;
if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
+ guard(mutex)(&ctx->mmap_lock);
io_free_region(ctx, &ctx->param_region);
return -EFAULT;
}
@@ -833,9 +827,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
case IORING_REGISTER_QUERY:
ret = io_query(ctx, arg, nr_args);
break;
- case IORING_REGISTER_ZCRX_REFILL:
- ret = io_zcrx_return_bufs(ctx, arg, nr_args);
- break;
default:
ret = -EINVAL;
break;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d787c16dc1c3..2602d76d5ff0 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1403,8 +1403,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
size_t max_segs = 0;
unsigned i;
- for (i = 0; i < nr_iovs; i++)
+ for (i = 0; i < nr_iovs; i++) {
max_segs += (iov[i].iov_len >> shift) + 2;
+ if (max_segs > INT_MAX)
+ return -EOVERFLOW;
+ }
return max_segs;
}
@@ -1510,7 +1513,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
if (unlikely(ret))
return ret;
} else {
- nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+ int ret = io_estimate_bvec_size(iov, nr_iovs, imu);
+
+ if (ret < 0)
+ return ret;
+ nr_segs = ret;
}
if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 08882648d569..5b2241a5813c 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -542,7 +542,7 @@ static void __io_complete_rw_common(struct io_kiocb *req, long res)
{
if (res == req->cqe.res)
return;
- if (res == -EAGAIN && io_rw_should_reissue(req)) {
+ if ((res == -EOPNOTSUPP || res == -EAGAIN) && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE;
} else {
req_set_fail(req);
@@ -655,13 +655,17 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
if (ret >= 0 && req->flags & REQ_F_CUR_POS)
req->file->f_pos = rw->kiocb.ki_pos;
if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) {
+ u32 cflags = 0;
+
__io_complete_rw_common(req, ret);
/*
* Safe to call io_end from here as we're inline
* from the submission path.
*/
io_req_io_end(req);
- io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list));
+ if (sel)
+ cflags = io_put_kbuf(req, ret, sel->buf_list);
+ io_req_set_res(req, final_ret, cflags);
io_req_rw_cleanup(req, issue_flags);
return IOU_COMPLETE;
} else {
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index a3f11349ce06..e22f072c7d5f 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -11,6 +11,7 @@
#include <linux/audit.h>
#include <linux/security.h>
#include <linux/cpuset.h>
+#include <linux/sched/cputime.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
@@ -169,7 +170,38 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
return READ_ONCE(sqd->state);
}
-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
+struct io_sq_time {
+ bool started;
+ u64 usec;
+};
+
+u64 io_sq_cpu_usec(struct task_struct *tsk)
+{
+ u64 utime, stime;
+
+ task_cputime_adjusted(tsk, &utime, &stime);
+ do_div(stime, 1000);
+ return stime;
+}
+
+static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist)
+{
+ if (!ist->started)
+ return;
+ ist->started = false;
+ sqd->work_time += io_sq_cpu_usec(current) - ist->usec;
+}
+
+static void io_sq_start_worktime(struct io_sq_time *ist)
+{
+ if (ist->started)
+ return;
+ ist->started = true;
+ ist->usec = io_sq_cpu_usec(current);
+}
+
+static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
+ bool cap_entries, struct io_sq_time *ist)
{
unsigned int to_submit;
int ret = 0;
@@ -182,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
const struct cred *creds = NULL;
+ io_sq_start_worktime(ist);
+
if (ctx->sq_creds != current_cred())
creds = override_creds(ctx->sq_creds);
@@ -255,23 +289,11 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
return retry_list || !llist_empty(&tctx->task_list);
}
-static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
-{
- struct rusage end;
-
- getrusage(current, RUSAGE_SELF, &end);
- end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
- end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
-
- sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
-}
-
static int io_sq_thread(void *data)
{
struct llist_node *retry_list = NULL;
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
- struct rusage start;
unsigned long timeout = 0;
char buf[TASK_COMM_LEN] = {};
DEFINE_WAIT(wait);
@@ -309,6 +331,7 @@ static int io_sq_thread(void *data)
mutex_lock(&sqd->lock);
while (1) {
bool cap_entries, sqt_spin = false;
+ struct io_sq_time ist = { };
if (io_sqd_events_pending(sqd) || signal_pending(current)) {
if (io_sqd_handle_event(sqd))
@@ -317,9 +340,8 @@ static int io_sq_thread(void *data)
}
cap_entries = !list_is_singular(&sqd->ctx_list);
- getrusage(current, RUSAGE_SELF, &start);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
- int ret = __io_sq_thread(ctx, cap_entries);
+ int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
sqt_spin = true;
@@ -327,15 +349,18 @@ static int io_sq_thread(void *data)
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
sqt_spin = true;
- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
- if (io_napi(ctx))
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+ if (io_napi(ctx)) {
+ io_sq_start_worktime(&ist);
io_napi_sqpoll_busy_poll(ctx);
+ }
+ }
+
+ io_sq_update_worktime(sqd, &ist);
if (sqt_spin || !time_after(jiffies, timeout)) {
- if (sqt_spin) {
- io_sq_update_worktime(sqd, &start);
+ if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
- }
if (unlikely(need_resched())) {
mutex_unlock(&sqd->lock);
cond_resched();
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index b83dcdec9765..fd2f6f29b516 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
void io_put_sq_data(struct io_sq_data *sqd);
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
+u64 io_sq_cpu_usec(struct task_struct *tsk);
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
{
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index f25110fb1b12..53532ae6256c 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
iwa = io_uring_alloc_async_data(NULL, req);
- if (!unlikely(iwa))
+ if (unlikely(!iwa))
return -ENOMEM;
iwa->req = req;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index a816f5902091..b1b723222cdb 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -928,74 +928,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
.uninstall = io_pp_uninstall,
};
-#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16)
-#define IO_ZCRX_SYS_REFILL_BATCH 32
-
-static void io_return_buffers(struct io_zcrx_ifq *ifq,
- struct io_uring_zcrx_rqe *rqes, unsigned nr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- struct net_iov *niov;
- netmem_ref netmem;
-
- if (!io_parse_rqe(&rqes[i], ifq, &niov))
- continue;
-
- scoped_guard(spinlock_bh, &ifq->rq_lock) {
- if (!io_zcrx_put_niov_uref(niov))
- continue;
- }
-
- netmem = net_iov_to_netmem(niov);
- if (!page_pool_unref_and_test(netmem))
- continue;
- io_zcrx_return_niov(niov);
- }
-}
-
-int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg)
-{
- struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH];
- struct io_uring_zcrx_rqe __user *user_rqes;
- struct io_uring_zcrx_sync_refill zr;
- struct io_zcrx_ifq *ifq;
- unsigned nr, i;
-
- if (nr_arg)
- return -EINVAL;
- if (copy_from_user(&zr, arg, sizeof(zr)))
- return -EFAULT;
- if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS)
- return -EINVAL;
- if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv)))
- return -EINVAL;
-
- ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id);
- if (!ifq)
- return -EINVAL;
- nr = zr.nr_entries;
- user_rqes = u64_to_user_ptr(zr.rqes);
-
- for (i = 0; i < nr;) {
- unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH);
- size_t size = batch * sizeof(rqes[0]);
-
- if (copy_from_user(rqes, user_rqes + i, size))
- return i ? i : -EFAULT;
- io_return_buffers(ifq, rqes, batch);
-
- i += batch;
-
- if (fatal_signal_pending(current))
- return i;
- cond_resched();
- }
- return nr;
-}
-
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
struct io_zcrx_ifq *ifq, int off, int len)
{
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 33ef61503092..a48871b5adad 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -63,8 +63,6 @@ struct io_zcrx_ifq {
};
#if defined(CONFIG_IO_URING_ZCRX)
-int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg);
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg);
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
@@ -97,11 +95,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct
{
return NULL;
}
-static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg)
-{
- return -EOPNOTSUPP;
-}
#endif
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);