summaryrefslogtreecommitdiff
path: root/include/uapi/linux/io_uring.h
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-02-24 02:42:51 +0300
committerJens Axboe <axboe@kernel.dk>2020-03-10 18:12:45 +0300
commitbcda7baaa3f15c7a95db3c024bb046d6e298f76b (patch)
treed523f254af643be6df07e9743816adfd4e68edd4 /include/uapi/linux/io_uring.h
parentddf0322db79c5984dc1a1db890f946dd19b7d6d9 (diff)
downloadlinux-bcda7baaa3f15c7a95db3c024bb046d6e298f76b.tar.xz
io_uring: support buffer selection for OP_READ and OP_RECV
If a server process has tons of pending socket connections, generally it uses epoll to wait for activity. When the socket is ready for reading (or writing), the task can select a buffer and issue a recv/send on the given fd. Now that we have fast (non-async thread) support, a task can have tons of pending reads or writes pending. But that means they need buffers to back that data, and if the number of connections is high enough, having them preallocated for all possible connections is unfeasible. With IORING_OP_PROVIDE_BUFFERS, an application can register buffers to use for any request. The request then sets IOSQE_BUFFER_SELECT in the sqe, and a given group ID in sqe->buf_group. When the fd becomes ready, a free buffer from the specified group is selected. If none are available, the request is terminated with -ENOBUFS. If successful, the CQE on completion will contain the buffer ID chosen in the cqe->flags member, encoded as: (buffer_id << IORING_CQE_BUFFER_SHIFT) | IORING_CQE_F_BUFFER; Once a buffer has been consumed by a request, it is no longer available and must be registered again with IORING_OP_PROVIDE_BUFFERS. Requests need to support this feature. For now, IORING_OP_READ and IORING_OP_RECV support it. This is checked on SQE submission, a CQE with res == -EOPNOTSUPP will be posted if attempted on unsupported requests. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/uapi/linux/io_uring.h')
-rw-r--r--include/uapi/linux/io_uring.h14
1 files changed, 14 insertions, 0 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index bc34a57a660b..9b263d9b24e6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -66,6 +66,7 @@ enum {
IOSQE_IO_LINK_BIT,
IOSQE_IO_HARDLINK_BIT,
IOSQE_ASYNC_BIT,
+ IOSQE_BUFFER_SELECT_BIT,
};
/*
@@ -81,6 +82,8 @@ enum {
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
/* always go async */
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
/*
* io_uring_setup() flags
@@ -156,6 +159,17 @@ struct io_uring_cqe {
};
/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ */
+#define IORING_CQE_F_BUFFER (1U << 0)
+
+enum {
+ IORING_CQE_BUFFER_SHIFT = 16,
+};
+
+/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL