diff options
-rw-r--r-- | include/linux/io_uring_types.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 32 | ||||
-rw-r--r-- | io_uring/fdinfo.c | 54 | ||||
-rw-r--r-- | io_uring/napi.c | 97 | ||||
-rw-r--r-- | io_uring/napi.h | 2 |
5 files changed, 160 insertions, 27 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fba2988accc3..072e65e93105 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -408,7 +408,7 @@ struct io_ring_ctx { /* napi busy poll default timeout */ ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; - bool napi_enabled; + u8 napi_track_mode; DECLARE_HASHTABLE(napi_ht, 4); #endif diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 47977a5c65f5..5d08435b95a8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -790,12 +790,40 @@ struct io_uring_buf_status { __u32 resv[8]; }; +enum io_uring_napi_op { + /* register/ungister backward compatible opcode */ + IO_URING_NAPI_REGISTER_OP = 0, + + /* opcodes to update napi_list when static tracking is used */ + IO_URING_NAPI_STATIC_ADD_ID = 1, + IO_URING_NAPI_STATIC_DEL_ID = 2 +}; + +enum io_uring_napi_tracking_strategy { + /* value must be 0 for backward compatibility */ + IO_URING_NAPI_TRACKING_DYNAMIC = 0, + IO_URING_NAPI_TRACKING_STATIC = 1, + IO_URING_NAPI_TRACKING_INACTIVE = 255 +}; + /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { __u32 busy_poll_to; __u8 prefer_busy_poll; - __u8 pad[3]; - __u64 resv; + + /* a io_uring_napi_op value */ + __u8 opcode; + __u8 pad[2]; + + /* + * for IO_URING_NAPI_REGISTER_OP, it is a + * io_uring_napi_tracking_strategy value. + * + * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID + * it is the napi id to add/del from napi_list. + */ + __u32 op_param; + __u32 resv; }; /* diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index efbec34ccb18..b214e5a407b5 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -46,6 +46,46 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, return 0; } +#ifdef CONFIG_NET_RX_BUSY_POLL +static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m, + const char *tracking_strategy) +{ + seq_puts(m, "NAPI:\tenabled\n"); + seq_printf(m, "napi tracking:\t%s\n", tracking_strategy); + seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); + if (ctx->napi_prefer_busy_poll) + seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); + else + seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); +} + +static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ + unsigned int mode = READ_ONCE(ctx->napi_track_mode); + + switch (mode) { + case IO_URING_NAPI_TRACKING_INACTIVE: + seq_puts(m, "NAPI:\tdisabled\n"); + break; + case IO_URING_NAPI_TRACKING_DYNAMIC: + common_tracking_show_fdinfo(ctx, m, "dynamic"); + break; + case IO_URING_NAPI_TRACKING_STATIC: + common_tracking_show_fdinfo(ctx, m, "static"); + break; + default: + seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode); + } +} +#else +static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ +} +#endif + /* * Caller holds a reference to the file already, we don't need to do * anything else to get an extra reference. @@ -219,18 +259,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } spin_unlock(&ctx->completion_lock); - -#ifdef CONFIG_NET_RX_BUSY_POLL - if (ctx->napi_enabled) { - seq_puts(m, "NAPI:\tenabled\n"); - seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); - if (ctx->napi_prefer_busy_poll) - seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); - else - seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); - } else { - seq_puts(m, "NAPI:\tdisabled\n"); - } -#endif + napi_show_fdinfo(ctx, m); } #endif diff --git a/io_uring/napi.c b/io_uring/napi.c index 1de1543d8034..b1ade3fda30f 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -81,6 +81,27 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) return 0; } +static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id) +{ + struct hlist_head *hash_list; + struct io_napi_entry *e; + + /* Non-NAPI IDs can be rejected. */ + if (napi_id < MIN_NAPI_ID) + return -EINVAL; + + hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; + guard(spinlock)(&ctx->napi_lock); + e = io_napi_hash_find(hash_list, napi_id); + if (!e) + return -ENOENT; + + list_del_rcu(&e->list); + hash_del_rcu(&e->node); + kfree_rcu(e, rcu); + return 0; +} + static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; @@ -136,9 +157,25 @@ static bool io_napi_busy_loop_should_end(void *data, return false; } -static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, - bool (*loop_end)(void *, unsigned long), - void *loop_end_arg) +/* + * never report stale entries + */ +static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) +{ + struct io_napi_entry *e; + + list_for_each_entry_rcu(e, &ctx->napi_list, list) + napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, + ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); + return false; +} + +static bool +dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { struct io_napi_entry *e; bool is_stale = false; @@ -154,6 +191,16 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, return is_stale; } +static inline bool +__io_napi_do_busy_loop(struct io_ring_ctx *ctx, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) +{ + if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) + return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); +} + static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { @@ -195,6 +242,7 @@ void io_napi_init(struct io_ring_ctx *ctx) spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); + ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE; } /* @@ -215,6 +263,24 @@ void io_napi_free(struct io_ring_ctx *ctx) INIT_LIST_HEAD_RCU(&ctx->napi_list); } +static int io_napi_register_napi(struct io_ring_ctx *ctx, + struct io_uring_napi *napi) +{ + switch (napi->op_param) { + case IO_URING_NAPI_TRACKING_DYNAMIC: + case IO_URING_NAPI_TRACKING_STATIC: + break; + default: + return -EINVAL; + } + /* clean the napi list for new settings */ + io_napi_free(ctx); + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); + WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); + WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); + return 0; +} + /* * io_napi_register() - Register napi with io-uring * @ctx: pointer to io-uring context structure @@ -226,7 +292,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), - .prefer_busy_poll = ctx->napi_prefer_busy_poll + .prefer_busy_poll = ctx->napi_prefer_busy_poll, + .op_param = ctx->napi_track_mode }; struct io_uring_napi napi; @@ -234,16 +301,26 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; - if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) + if (napi.pad[0] || napi.pad[1] || napi.resv) return -EINVAL; if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; - WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); - WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); - WRITE_ONCE(ctx->napi_enabled, true); - return 0; + switch (napi.opcode) { + case IO_URING_NAPI_REGISTER_OP: + return io_napi_register_napi(ctx, &napi); + case IO_URING_NAPI_STATIC_ADD_ID: + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) + return -EINVAL; + return __io_napi_add_id(ctx, napi.op_param); + case IO_URING_NAPI_STATIC_DEL_ID: + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) + return -EINVAL; + return __io_napi_del_id(ctx, napi.op_param); + default: + return -EINVAL; + } } /* @@ -266,7 +343,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_enabled, false); + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); return 0; } diff --git a/io_uring/napi.h b/io_uring/napi.h index 4ae622f37b30..fa742f42e09b 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -44,7 +44,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_enabled)) + if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) return; sock = sock_from_file(req->file); |