summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/io_uring.h6
-rw-r--r--include/linux/io_uring_types.h129
-rw-r--r--include/uapi/linux/io_uring.h21
3 files changed, 91 insertions, 65 deletions
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index bb9c666bd584..106cdc55ff3b 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -81,6 +81,7 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
+int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
@@ -116,6 +117,11 @@ static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
+static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ return -EOPNOTSUPP;
+}
#endif
#endif
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f04ce513fadb..13d19b9be9f4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -69,8 +69,8 @@ struct io_uring_task {
};
struct io_uring {
- u32 head ____cacheline_aligned_in_smp;
- u32 tail ____cacheline_aligned_in_smp;
+ u32 head;
+ u32 tail;
};
/*
@@ -176,7 +176,6 @@ struct io_submit_state {
unsigned short submit_nr;
unsigned int cqes_count;
struct blk_plug plug;
- struct io_uring_cqe cqes[16];
};
struct io_ev_fd {
@@ -205,25 +204,17 @@ struct io_ring_ctx {
unsigned int has_evfd: 1;
/* all CQEs should be posted only by the submitter task */
unsigned int task_complete: 1;
+ unsigned int lockless_cq: 1;
unsigned int syscall_iopoll: 1;
unsigned int poll_activated: 1;
unsigned int drain_disabled: 1;
unsigned int compat: 1;
- enum task_work_notify_mode notify_method;
+ struct task_struct *submitter_task;
+ struct io_rings *rings;
+ struct percpu_ref refs;
- /*
- * If IORING_SETUP_NO_MMAP is used, then the below holds
- * the gup'ed pages for the two rings, and the sqes.
- */
- unsigned short n_ring_pages;
- unsigned short n_sqe_pages;
- struct page **ring_pages;
- struct page **sqe_pages;
-
- struct io_rings *rings;
- struct task_struct *submitter_task;
- struct percpu_ref refs;
+ enum task_work_notify_mode notify_method;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -261,31 +252,20 @@ struct io_ring_ctx {
struct io_buffer_list *io_bl;
struct xarray io_bl_xa;
- struct list_head io_buffers_cache;
struct io_hash_table cancel_table_locked;
- struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache;
- } ____cacheline_aligned_in_smp;
-
- /* IRQ completion list, under ->completion_lock */
- struct io_wq_work_list locked_free_list;
- unsigned int locked_free_nr;
-
- const struct cred *sq_creds; /* cred used for __io_sq_thread() */
- struct io_sq_data *sq_data; /* if using sq thread polling */
-
- struct wait_queue_head sqo_sq_wait;
- struct list_head sqd_list;
- unsigned long check_cq;
-
- unsigned int file_alloc_start;
- unsigned int file_alloc_end;
-
- struct xarray personalities;
- u32 pers_next;
+ /*
+ * ->iopoll_list is protected by the ctx->uring_lock for
+ * io_uring instances that don't use IORING_SETUP_SQPOLL.
+ * For SQPOLL, only the single threaded io_sq_thread() will
+ * manipulate the list, hence no extra locking is needed there.
+ */
+ struct io_wq_work_list iopoll_list;
+ bool poll_multi_queue;
+ } ____cacheline_aligned_in_smp;
struct {
/*
@@ -298,39 +278,55 @@ struct io_ring_ctx {
unsigned cached_cq_tail;
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
- struct wait_queue_head cq_wait;
unsigned cq_extra;
} ____cacheline_aligned_in_smp;
+ /*
+ * task_work and async notification delivery cacheline. Expected to
+ * regularly bounce b/w CPUs.
+ */
struct {
- spinlock_t completion_lock;
-
- bool poll_multi_queue;
- atomic_t cq_wait_nr;
-
- /*
- * ->iopoll_list is protected by the ctx->uring_lock for
- * io_uring instances that don't use IORING_SETUP_SQPOLL.
- * For SQPOLL, only the single threaded io_sq_thread() will
- * manipulate the list, hence no extra locking is needed there.
- */
- struct io_wq_work_list iopoll_list;
- struct io_hash_table cancel_table;
-
struct llist_head work_llist;
-
- struct list_head io_buffers_comp;
+ unsigned long check_cq;
+ atomic_t cq_wait_nr;
+ atomic_t cq_timeouts;
+ struct wait_queue_head cq_wait;
} ____cacheline_aligned_in_smp;
/* timeouts */
struct {
spinlock_t timeout_lock;
- atomic_t cq_timeouts;
struct list_head timeout_list;
struct list_head ltimeout_list;
unsigned cq_last_tm_flush;
} ____cacheline_aligned_in_smp;
+ struct io_uring_cqe completion_cqes[16];
+
+ spinlock_t completion_lock;
+
+ /* IRQ completion list, under ->completion_lock */
+ struct io_wq_work_list locked_free_list;
+ unsigned int locked_free_nr;
+
+ struct list_head io_buffers_comp;
+ struct list_head cq_overflow_list;
+ struct io_hash_table cancel_table;
+
+ const struct cred *sq_creds; /* cred used for __io_sq_thread() */
+ struct io_sq_data *sq_data; /* if using sq thread polling */
+
+ struct wait_queue_head sqo_sq_wait;
+ struct list_head sqd_list;
+
+ unsigned int file_alloc_start;
+ unsigned int file_alloc_end;
+
+ struct xarray personalities;
+ u32 pers_next;
+
+ struct list_head io_buffers_cache;
+
/* Keep this last, we don't need it for the fast path */
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
@@ -374,6 +370,15 @@ struct io_ring_ctx {
unsigned sq_thread_idle;
/* protected by ->completion_lock */
unsigned evfd_last_cq_tail;
+
+ /*
+ * If IORING_SETUP_NO_MMAP is used, then the below holds
+ * the gup'ed pages for the two rings, and the sqes.
+ */
+ unsigned short n_ring_pages;
+ unsigned short n_sqe_pages;
+ struct page **ring_pages;
+ struct page **sqe_pages;
};
struct io_tw_state {
@@ -409,7 +414,6 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
- REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT,
REQ_F_HASH_LOCKED_BIT,
@@ -479,8 +483,6 @@ enum {
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
- /* ->extra1 and ->extra2 are initialised */
- REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
/* recvmsg special flag, clear EPOLLIN */
REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
@@ -579,13 +581,7 @@ struct io_kiocb {
struct io_task_work io_task_work;
unsigned nr_tw;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
- union {
- struct hlist_node hash_node;
- struct {
- u64 extra1;
- u64 extra2;
- };
- };
+ struct hlist_node hash_node;
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
@@ -595,6 +591,11 @@ struct io_kiocb {
/* custom credentials, valid IFF REQ_F_CREDS is set */
const struct cred *creds;
struct io_wq_work work;
+
+ struct {
+ u64 extra1;
+ u64 extra2;
+ } big_cqe;
};
struct io_overflow_cqe {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 08720c7bd92f..8e61f8b7c2ce 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -185,6 +185,11 @@ enum {
*/
#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
+/*
+ * Removes indirection through the SQ index array.
+ */
+#define IORING_SETUP_NO_SQARRAY (1U << 16)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -299,11 +304,15 @@ enum io_uring_op {
* request 'user_data'
* IORING_ASYNC_CANCEL_ANY Match any request
* IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor
+ * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key
+ * IORING_ASYNC_CANCEL_OP Match request based on opcode
*/
#define IORING_ASYNC_CANCEL_ALL (1U << 0)
#define IORING_ASYNC_CANCEL_FD (1U << 1)
#define IORING_ASYNC_CANCEL_ANY (1U << 2)
#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
+#define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
+#define IORING_ASYNC_CANCEL_OP (1U << 5)
/*
* send/sendmsg and recv/recvmsg flags (sqe->ioprio)
@@ -697,7 +706,9 @@ struct io_uring_sync_cancel_reg {
__s32 fd;
__u32 flags;
struct __kernel_timespec timeout;
- __u64 pad[4];
+ __u8 opcode;
+ __u8 pad[7];
+ __u64 pad2[3];
};
/*
@@ -717,6 +728,14 @@ struct io_uring_recvmsg_out {
__u32 flags;
};
+/*
+ * Argument for IORING_OP_URING_CMD when file is a socket
+ */
+enum {
+ SOCKET_URING_OP_SIOCINQ = 0,
+ SOCKET_URING_OP_SIOCOUTQ,
+};
+
#ifdef __cplusplus
}
#endif