summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-01 22:16:24 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-01 22:16:24 +0300
commitc288d9cd710433e5991d58a0764c4d08a933b871 (patch)
tree32f37ec7d0083524d72b5ac9b9f67b3e86f9f748 /include
parent911a2997a5b7c16b27dfe83d8e2f614e44d90f74 (diff)
parente149bd742b2db6a63fc078b1ea6843dc9b22678d (diff)
downloadlinux-c288d9cd710433e5991d58a0764c4d08a933b871.tar.xz
Merge tag 'for-5.14/io_uring-2021-06-30' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe: - Multi-queue iopoll improvement (Fam) - Allow configurable io-wq CPU masks (me) - renameat/linkat tightening (me) - poll re-arm improvement (Olivier) - SQPOLL race fix (Olivier) - Cancelation unification (Pavel) - SQPOLL cleanups (Pavel) - Enable file backed buffers for shmem/memfd (Pavel) - A ton of cleanups and performance improvements (Pavel) - Followup and misc fixes (Colin, Fam, Hao, Olivier) * tag 'for-5.14/io_uring-2021-06-30' of git://git.kernel.dk/linux-block: (83 commits) io_uring: code clean for kiocb_done() io_uring: spin in iopoll() only when reqs are in a single queue io_uring: pre-initialise some of req fields io_uring: refactor io_submit_flush_completions io_uring: optimise hot path restricted checks io_uring: remove not needed PF_EXITING check io_uring: mainstream sqpoll task_work running io_uring: refactor io_arm_poll_handler() io_uring: reduce latency by reissueing the operation io_uring: add IOPOLL and reserved field checks to IORING_OP_UNLINKAT io_uring: add IOPOLL and reserved field checks to IORING_OP_RENAMEAT io_uring: refactor io_openat2() io_uring: simplify struct io_uring_sqe layout io_uring: update sqe layout build checks io_uring: fix code style problems io_uring: refactor io_sq_thread() io_uring: don't change sqpoll creds if not needed io_uring: Create define to modify a SQPOLL parameter io_uring: Fix race condition when sqp thread goes to sleep io_uring: improve in tctx_task_work() resubmission ...
Diffstat (limited to 'include')
-rw-r--r--include/trace/events/io_uring.h106
-rw-r--r--include/uapi/linux/io_uring.h28
2 files changed, 84 insertions, 50 deletions
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index abb8b24744fd..e4e44a2b4aa9 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -12,11 +12,11 @@ struct io_wq_work;
/**
* io_uring_create - called after a new io_uring context was prepared
*
- * @fd: corresponding file descriptor
- * @ctx: pointer to a ring context structure
+ * @fd: corresponding file descriptor
+ * @ctx: pointer to a ring context structure
* @sq_entries: actual SQ size
* @cq_entries: actual CQ size
- * @flags: SQ ring flags, provided to io_uring_setup(2)
+ * @flags: SQ ring flags, provided to io_uring_setup(2)
*
* Allows to trace io_uring creation and provide pointer to a context, that can
* be used later to find correlated events.
@@ -52,12 +52,12 @@ TRACE_EVENT(io_uring_create,
* io_uring_register - called after a buffer/file/eventfd was successfully
* registered for a ring
*
- * @ctx: pointer to a ring context structure
- * @opcode: describes which operation to perform
+ * @ctx: pointer to a ring context structure
+ * @opcode: describes which operation to perform
* @nr_user_files: number of registered files
* @nr_user_bufs: number of registered buffers
* @cq_ev_fd: whether eventfs registered or not
- * @ret: return code
+ * @ret: return code
*
* Allows to trace fixed files/buffers/eventfds, that could be registered to
* avoid an overhead of getting references to them for every operation. This
@@ -142,16 +142,16 @@ TRACE_EVENT(io_uring_queue_async_work,
TP_ARGS(ctx, rw, req, work, flags),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( int, rw )
- __field( void *, req )
+ __field( void *, ctx )
+ __field( int, rw )
+ __field( void *, req )
__field( struct io_wq_work *, work )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->ctx = ctx;
- __entry->rw = rw;
+ __entry->rw = rw;
__entry->req = req;
__entry->work = work;
__entry->flags = flags;
@@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
/**
* io_uring_link - called before the io_uring request added into link_list of
- * another request
+ * another request
*
- * @ctx: pointer to a ring context structure
- * @req: pointer to a linked request
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to a linked request
* @target_req: pointer to a previous request, that would contain @req
*
* Allows to track linked requests, to understand dependencies between requests
@@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
TP_ARGS(ctx, req, target_req),
TP_STRUCT__entry (
- __field( void *, ctx )
- __field( void *, req )
+ __field( void *, ctx )
+ __field( void *, req )
__field( void *, target_req )
),
@@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
TP_ARGS(ctx, min_events),
TP_STRUCT__entry (
- __field( void *, ctx )
+ __field( void *, ctx )
__field( int, min_events )
),
@@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
TP_ARGS(req, link),
TP_STRUCT__entry (
- __field( void *, req )
+ __field( void *, req )
__field( void *, link )
),
@@ -318,13 +318,14 @@ TRACE_EVENT(io_uring_complete,
__entry->res, __entry->cflags)
);
-
/**
* io_uring_submit_sqe - called before submitting one SQE
*
* @ctx: pointer to a ring context structure
+ * @req: pointer to a submitted request
* @opcode: opcode of request
* @user_data: user data associated with the request
+ * @flags request flags
* @force_nonblock: whether a context blocking or not
* @sq_thread: true if sq_thread has submitted this SQE
*
@@ -333,41 +334,60 @@ TRACE_EVENT(io_uring_complete,
*/
TRACE_EVENT(io_uring_submit_sqe,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
- bool sq_thread),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
+ bool force_nonblock, bool sq_thread),
- TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
+ TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
+ __field( u32, flags )
__field( bool, force_nonblock )
__field( bool, sq_thread )
),
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
+ __entry->flags = flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread;
),
- TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->force_nonblock, __entry->sq_thread)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
+ "non block %d, sq_thread %d", __entry->ctx, __entry->req,
+ __entry->opcode, (unsigned long long)__entry->user_data,
+ __entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
+/*
+ * io_uring_poll_arm - called after arming a poll wait if successful
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to the armed request
+ * @opcode: opcode of request
+ * @user_data: user data associated with the request
+ * @mask: request poll events mask
+ * @events: registered events of interest
+ *
+ * Allows to track which fds are waiting for and what are the events of
+ * interest.
+ */
TRACE_EVENT(io_uring_poll_arm,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
+ int mask, int events),
- TP_ARGS(ctx, opcode, user_data, mask, events),
+ TP_ARGS(ctx, req, opcode, user_data, mask, events),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
__field( int, mask )
@@ -376,16 +396,17 @@ TRACE_EVENT(io_uring_poll_arm,
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
__entry->mask = mask;
__entry->events = events;
),
- TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data,
- __entry->mask, __entry->events)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->req, __entry->opcode,
+ (unsigned long long) __entry->user_data,
+ __entry->mask, __entry->events)
);
TRACE_EVENT(io_uring_poll_wake,
@@ -440,27 +461,40 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask)
);
+/*
+ * io_uring_task_run - called when task_work_run() executes the poll events
+ * notification callbacks
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to the armed request
+ * @opcode: opcode of request
+ * @user_data: user data associated with the request
+ *
+ * Allows to track when notified poll events are processed
+ */
TRACE_EVENT(io_uring_task_run,
- TP_PROTO(void *ctx, u8 opcode, u64 user_data),
+ TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
- TP_ARGS(ctx, opcode, user_data),
+ TP_ARGS(ctx, req, opcode, user_data),
TP_STRUCT__entry (
__field( void *, ctx )
+ __field( void *, req )
__field( u8, opcode )
__field( u64, user_data )
),
TP_fast_assign(
__entry->ctx = ctx;
+ __entry->req = req;
__entry->opcode = opcode;
__entry->user_data = user_data;
),
- TP_printk("ring %p, op %d, data 0x%llx",
- __entry->ctx, __entry->opcode,
- (unsigned long long) __entry->user_data)
+ TP_printk("ring %p, req %p, op %d, data 0x%llx",
+ __entry->ctx, __entry->req, __entry->opcode,
+ (unsigned long long) __entry->user_data)
);
#endif /* _TRACE_IO_URING_H */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 162ff99ed2cb..79126d5cd289 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -46,21 +46,17 @@ struct io_uring_sqe {
__u32 unlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
+ /* pack this to avoid bogus arm OABI complaints */
union {
- struct {
- /* pack this to avoid bogus arm OABI complaints */
- union {
- /* index into fixed buffers, if used */
- __u16 buf_index;
- /* for grouped buffer selection */
- __u16 buf_group;
- } __attribute__((packed));
- /* personality to use, if used */
- __u16 personality;
- __s32 splice_fd_in;
- };
- __u64 __pad2[3];
- };
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
+ /* personality to use, if used */
+ __u16 personality;
+ __s32 splice_fd_in;
+ __u64 __pad2[2];
};
enum {
@@ -306,6 +302,10 @@ enum {
IORING_REGISTER_BUFFERS2 = 15,
IORING_REGISTER_BUFFERS_UPDATE = 16,
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
/* this goes last */
IORING_REGISTER_LAST
};