diff options
Diffstat (limited to 'io_uring')
| -rw-r--r-- | io_uring/alloc_cache.h | 2 | ||||
| -rw-r--r-- | io_uring/eventfd.c | 1 | ||||
| -rw-r--r-- | io_uring/futex.c | 4 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 3 | ||||
| -rw-r--r-- | io_uring/kbuf.c | 9 | ||||
| -rw-r--r-- | io_uring/kbuf.h | 8 | ||||
| -rw-r--r-- | io_uring/memmap.c | 46 | ||||
| -rw-r--r-- | io_uring/napi.c | 29 | ||||
| -rw-r--r-- | io_uring/napi.h | 8 | ||||
| -rw-r--r-- | io_uring/poll.c | 6 | ||||
| -rw-r--r-- | io_uring/register.c | 36 | ||||
| -rw-r--r-- | io_uring/rsrc.c | 5 | ||||
| -rw-r--r-- | io_uring/rsrc.h | 9 | ||||
| -rw-r--r-- | io_uring/rw.c | 4 | ||||
| -rw-r--r-- | io_uring/tctx.c | 15 | ||||
| -rw-r--r-- | io_uring/timeout.c | 35 | ||||
| -rw-r--r-- | io_uring/tw.c | 12 | ||||
| -rw-r--r-- | io_uring/wait.c | 6 | ||||
| -rw-r--r-- | io_uring/zcrx.c | 8 |
19 files changed, 192 insertions, 54 deletions
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index 45fcd8b3b824..962b6e2d04cc 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -64,7 +64,7 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) static inline void io_cache_free(struct io_alloc_cache *cache, void *obj) { if (!io_alloc_cache_put(cache, obj)) - kfree(obj); + kvfree(obj); } #endif diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index 3da028500f76..d656cc2a0b9b 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c @@ -43,6 +43,7 @@ static void io_eventfd_do_signal(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops); eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); io_eventfd_put(ev_fd); } diff --git a/io_uring/futex.c b/io_uring/futex.c index fd503c24b428..9cc1788ef4c6 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -159,8 +159,10 @@ static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q) struct io_kiocb *req = q->wake_data; struct io_futexv_data *ifd = req->async_data; - if (!io_futexv_claim(ifd)) + if (!io_futexv_claim(ifd)) { + __futex_wake_mark(q); return; + } if (unlikely(!__futex_wake_mark(q))) return; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index dd6326dc5f88..4ed998d60c09 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2575,7 +2575,8 @@ struct file *io_uring_ctx_get_file(unsigned int fd, bool registered) return ERR_PTR(-EBADF); if (io_is_uring_fops(file)) return file; - fput(file); + if (!registered) + fput(file); return ERR_PTR(-EOPNOTSUPP); } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8da2ff798170..63061aa1cab9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -47,7 +47,7 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) this_len = min_t(u32, len, buf_len); buf_len -= this_len; /* Stop looping for invalid buffer length of 0 */ - if (buf_len || !this_len) { + if (buf_len > bl->min_left_sub_one || !this_len) { WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); WRITE_ONCE(buf->len, buf_len); return false; @@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; + /* minimum left byte count is a property of incremental buffers */ + if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left) + return -EINVAL; + bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -680,10 +684,11 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) } #endif - bl->nr_entries = reg.ring_entries; bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; bl->buf_ring = br; + if (reg.min_left) + bl->min_left_sub_one = reg.min_left - 1; if (reg.flags & IOU_PBUF_RING_INC) bl->flags |= IOBL_INC; ret = io_buffer_add_list(ctx, bl, reg.bgid); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index bf15e26520d3..401773e1ef80 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -27,12 +27,18 @@ struct io_buffer_list { __u16 bgid; /* below is for ring provided buffers */ - __u16 nr_entries; __u16 head; __u16 mask; __u16 flags; + /* + * minimum required amount to be left to reuse an incrementally + * consumed buffer. If less than this is left at consumption time, + * buffer is done and head is incremented to the next buffer. + */ + __u32 min_left_sub_one; + struct io_mapped_region region; }; diff --git a/io_uring/memmap.c b/io_uring/memmap.c index e6958968975a..4f9b439319c4 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -366,9 +366,53 @@ unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr, #else /* !CONFIG_MMU */ +/* + * Drop the pages that were initially referenced and added in + * io_uring_mmap(). We cannot have had a mremap() as that isn't supported, + * hence the vma should be identical to the one we initially referenced and + * mapped, and partial unmaps and splitting isn't possible on a file backed + * mapping. + */ +static void io_uring_nommu_vm_close(struct vm_area_struct *vma) +{ + unsigned long index; + + for (index = vma->vm_start; index < vma->vm_end; index += PAGE_SIZE) + put_page(virt_to_page((void *) index)); +} + +static const struct vm_operations_struct io_uring_nommu_vm_ops = { + .close = io_uring_nommu_vm_close, +}; + int io_uring_mmap(struct file *file, struct vm_area_struct *vma) { - return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; + struct io_ring_ctx *ctx = file->private_data; + struct io_mapped_region *region; + unsigned long i; + + if (!is_nommu_shared_mapping(vma->vm_flags)) + return -EINVAL; + + guard(mutex)(&ctx->mmap_lock); + region = io_mmap_get_region(ctx, vma->vm_pgoff); + if (!region || !io_region_is_set(region)) + return -EINVAL; + + if ((vma->vm_end - vma->vm_start) != + (unsigned long) region->nr_pages << PAGE_SHIFT) + return -EINVAL; + + /* + * Pin the pages so io_free_region()'s release_pages() does not + * drop the last reference while this VMA exists. delete_vma() + * in mm/nommu.c calls vma_close() which runs ->close above. + */ + for (i = 0; i < region->nr_pages; i++) + get_page(region->pages[i]); + + vma->vm_ops = &io_uring_nommu_vm_ops; + return 0; } unsigned int io_uring_nommu_mmap_capabilities(struct file *file) diff --git a/io_uring/napi.c b/io_uring/napi.c index 4a10de03e426..bfc771445912 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t) return ns_to_ktime(t << 10); } -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode) { struct hlist_head *hash_list; struct io_napi_entry *e; @@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) * kfree() */ spin_lock(&ctx->napi_lock); + if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) { + spin_unlock(&ctx->napi_lock); + kfree(e); + return -EINVAL; + } if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); @@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx, bool (*loop_end)(void *, unsigned long), void *loop_end_arg) { - if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) + switch (READ_ONCE(ctx->napi_track_mode)) { + case IO_URING_NAPI_TRACKING_STATIC: return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); - return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + case IO_URING_NAPI_TRACKING_DYNAMIC: + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + default: + return false; + } } static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, @@ -273,11 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx, default: return -EINVAL; } - /* clean the napi list for new settings */ + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); io_napi_free(ctx); - WRITE_ONCE(ctx->napi_track_mode, napi->op_param); + /* cap NAPI at 10 msec of spin time */ + napi->busy_poll_to = min(10000, napi->busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); return 0; } @@ -313,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) case IO_URING_NAPI_STATIC_ADD_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; - return __io_napi_add_id(ctx, napi.op_param); + return __io_napi_add_id(ctx, napi.op_param, + IO_URING_NAPI_TRACKING_STATIC); case IO_URING_NAPI_STATIC_DEL_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; @@ -341,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); + io_napi_free(ctx); return 0; } diff --git a/io_uring/napi.h b/io_uring/napi.h index fa742f42e09b..e0aecccc5065 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct socket *sock; + unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC; - if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) + if (READ_ONCE(ctx->napi_track_mode) != mode) return; sock = sock_from_file(req->file); if (sock && sock->sk) - __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); + __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode); } #else diff --git a/io_uring/poll.c b/io_uring/poll.c index 74eef7884159..0204affdc308 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -93,7 +93,7 @@ static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { - if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + if (unlikely((unsigned int)atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -417,8 +417,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * disable multishot as there is a circular dependency between * CQ posting and triggering the event. */ - if (mask & EPOLL_URING_WAKE) + if (mask & EPOLL_URING_WAKE) { poll->events |= EPOLLONESHOT; + req->apoll_events |= EPOLLONESHOT; + } /* optional, saves extra locking for removal in tw handler */ if (mask && poll->events & EPOLLONESHOT) { diff --git a/io_uring/register.c b/io_uring/register.c index 24e593332d1a..dce5e2f9cf77 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -599,10 +599,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (tail - old_head > p->sq_entries) goto overflow; for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->sq_entries - 1); - unsigned dst_head = i & (p->sq_entries - 1); - - n.sq_sqes[dst_head] = o.sq_sqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t sq_size; + + index = i; + sq_size = sizeof(struct io_uring_sqe); + src_mask = ctx->sq_entries - 1; + dst_mask = p->sq_entries - 1; + if (ctx->flags & IORING_SETUP_SQE128) { + index <<= 1; + sq_size <<= 1; + src_mask = (ctx->sq_entries << 1) - 1; + dst_mask = (p->sq_entries << 1) - 1; + } + memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size); } WRITE_ONCE(n.rings->sq.head, old_head); WRITE_ONCE(n.rings->sq.tail, tail); @@ -619,10 +629,20 @@ overflow: goto out; } for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->cq_entries - 1); - unsigned dst_head = i & (p->cq_entries - 1); - - n.rings->cqes[dst_head] = o.rings->cqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t cq_size; + + index = i; + cq_size = sizeof(struct io_uring_cqe); + src_mask = ctx->cq_entries - 1; + dst_mask = p->cq_entries - 1; + if (ctx->flags & IORING_SETUP_CQE32) { + index <<= 1; + cq_size <<= 1; + src_mask = (ctx->cq_entries << 1) - 1; + dst_mask = (p->cq_entries << 1) - 1; + } + memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size); } WRITE_ONCE(n.rings->cq.head, old_head); WRITE_ONCE(n.rings->cq.tail, tail); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index fd36e0e319a2..650303626be6 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -168,7 +168,7 @@ bool io_rsrc_cache_init(struct io_ring_ctx *ctx) void io_rsrc_cache_free(struct io_ring_ctx *ctx) { io_alloc_cache_free(&ctx->node_cache, kfree); - io_alloc_cache_free(&ctx->imu_cache, kfree); + io_alloc_cache_free(&ctx->imu_cache, kvfree); } static void io_clear_table_tags(struct io_rsrc_data *data) @@ -238,6 +238,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, continue; i = up->offset + done; + if (i >= ctx->file_table.data.nr) + break; + i = array_index_nospec(i, ctx->file_table.data.nr); if (io_reset_rsrc_node(ctx, &ctx->file_table.data, i)) io_file_bitmap_clear(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index cff0f8834c35..44e3386f7c1c 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -109,10 +109,15 @@ static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node } static inline bool io_reset_rsrc_node(struct io_ring_ctx *ctx, - struct io_rsrc_data *data, int index) + struct io_rsrc_data *data, + unsigned int index) { - struct io_rsrc_node *node = data->nodes[index]; + struct io_rsrc_node *node; + if (index >= data->nr) + return false; + index = array_index_nospec(index, data->nr); + node = data->nodes[index]; if (!node) return false; io_put_rsrc_node(ctx, node); diff --git a/io_uring/rw.c b/io_uring/rw.c index 20654deff84d..e729e0e7657e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -580,7 +580,7 @@ void io_req_rw_complete(struct io_tw_req tw_req, io_tw_token_t tw) io_req_io_end(req); if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) - req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL); + req->cqe.flags |= io_put_kbuf(req, max(req->cqe.res, 0), NULL); io_req_rw_cleanup(req, 0); io_req_task_complete(tw_req, tw); @@ -1379,7 +1379,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) list_del(&req->iopoll_node); wq_list_add_tail(&req->comp_list, &ctx->submit_state.compl_reqs); nr_events++; - req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL); + req->cqe.flags = io_put_kbuf(req, max(req->cqe.res, 0), NULL); if (!io_is_uring_cmd(req)) io_req_rw_cleanup(req, 0); } diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 61533f30494f..6af62ca9baba 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -146,9 +146,13 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) if (IS_ERR(tctx)) return PTR_ERR(tctx); - if (ctx->int_flags & IO_RING_F_IOWQ_LIMITS_SET) { - unsigned int limits[2] = { ctx->iowq_limits[0], - ctx->iowq_limits[1], }; + if (data_race(ctx->int_flags) & IO_RING_F_IOWQ_LIMITS_SET) { + unsigned int limits[2]; + + mutex_lock(&ctx->uring_lock); + limits[0] = ctx->iowq_limits[0]; + limits[1] = ctx->iowq_limits[1]; + mutex_unlock(&ctx->uring_lock); ret = io_wq_max_workers(tctx->io_wq, limits); if (ret) @@ -171,7 +175,10 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) } if (!current->io_uring) { err_free: - io_wq_put_and_exit(tctx->io_wq); + if (tctx->io_wq) { + io_wq_exit_start(tctx->io_wq); + io_wq_put_and_exit(tctx->io_wq); + } percpu_counter_destroy(&tctx->inflight); kfree(tctx); } diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 4cfdfc519770..e2595cae2b07 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -3,6 +3,7 @@ #include <linux/errno.h> #include <linux/file.h> #include <linux/io_uring.h> +#include <linux/time_namespace.h> #include <trace/events/io_uring.h> @@ -35,6 +36,22 @@ struct io_timeout_rem { bool ltimeout; }; +static clockid_t io_flags_to_clock(unsigned flags) +{ + switch (flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) { struct timespec64 ts; @@ -43,7 +60,7 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) *time = ns_to_ktime(arg); if (*time < 0) return -EINVAL; - return 0; + goto out; } if (get_timespec64(&ts, u64_to_user_ptr(arg))) @@ -51,6 +68,9 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) if (ts.tv_sec < 0 || ts.tv_nsec < 0) return -EINVAL; *time = timespec64_to_ktime(ts); +out: + if (flags & IORING_TIMEOUT_ABS) + *time = timens_ktime_to_host(io_flags_to_clock(flags), *time); return 0; } @@ -399,18 +419,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) static clockid_t io_timeout_get_clock(struct io_timeout_data *data) { - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { - case IORING_TIMEOUT_BOOTTIME: - return CLOCK_BOOTTIME; - case IORING_TIMEOUT_REALTIME: - return CLOCK_REALTIME; - default: - /* can't happen, vetted at prep time */ - WARN_ON_ONCE(1); - fallthrough; - case 0: - return CLOCK_MONOTONIC; - } + return io_flags_to_clock(data->flags); } static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, diff --git a/io_uring/tw.c b/io_uring/tw.c index fdff81eebc95..023d5e6bc491 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -273,8 +273,18 @@ void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) { - struct llist_node *node = llist_del_all(&ctx->work_llist); + struct llist_node *node; + /* + * Running the work items may utilize ->retry_llist as a means + * for capping the number of task_work entries run at the same + * time. But that list can potentially race with moving the work + * from here, if the task is exiting. As any normal task_work + * running holds ->uring_lock already, just guard this slow path + * with ->uring_lock to avoid racing on ->retry_llist. + */ + guard(mutex)(&ctx->uring_lock); + node = llist_del_all(&ctx->work_llist); __io_fallback_tw(node, false); node = llist_del_all(&ctx->retry_llist); __io_fallback_tw(node, false); diff --git a/io_uring/wait.c b/io_uring/wait.c index 91df86ce0d18..ec01e78a216d 100644 --- a/io_uring/wait.c +++ b/io_uring/wait.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/io_uring.h> +#include <linux/time_namespace.h> #include <trace/events/io_uring.h> @@ -229,7 +230,10 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (ext_arg->ts_set) { iowq.timeout = timespec64_to_ktime(ext_arg->ts); - if (!(flags & IORING_ENTER_ABS_TIMER)) + if (flags & IORING_ENTER_ABS_TIMER) + iowq.timeout = timens_ktime_to_host(ctx->clockid, + iowq.timeout); + else iowq.timeout = ktime_add(iowq.timeout, start_time); } diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 9a83d7eb4210..19837e0b5e91 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -396,6 +396,7 @@ static int io_allocate_rbuf_ring(struct io_ring_ctx *ctx, ifq->rq.ring = (struct io_uring *)ptr; ifq->rq.rqes = (struct io_uring_zcrx_rqe *)(ptr + off); + memset(ifq->rq.ring, 0, sizeof(*ifq->rq.ring)); return 0; } @@ -494,10 +495,9 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, for (i = 0; i < nr_iovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; - niov->owner = &area->nia; + net_iov_init(niov, &area->nia, NET_IOV_IOURING); area->freelist[i] = i; atomic_set(&area->user_refs[i], 0); - niov->type = NET_IOV_IOURING; } if (ifq->dev) { @@ -579,13 +579,13 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) if (ifq->area) io_zcrx_free_area(ifq, ifq->area); - free_uid(ifq->user); if (ifq->mm_account) mmdrop(ifq->mm_account); if (ifq->dev) put_device(ifq->dev); io_free_rbuf_ring(ifq); + free_uid(ifq->user); mutex_destroy(&ifq->pp_lock); kfree(ifq); } @@ -601,6 +601,8 @@ static void io_zcrx_return_niov_freelist(struct net_iov *niov) struct io_zcrx_area *area = io_zcrx_iov_to_area(niov); guard(spinlock_bh)(&area->freelist_lock); + if (WARN_ON_ONCE(area->free_count >= area->nia.num_niovs)) + return; area->freelist[area->free_count++] = net_iov_idx(niov); } |
