summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 07:27:33 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 07:27:33 +0300
commita312e1706ce6c124f04ec85ddece240f3bb2a696 (patch)
treeba749bac4f345fc48c7c79d9a9c5713fe87af300 /include
parent1cbfb828e05171ca2dd77b5988d068e6872480fe (diff)
parent561e3a0c40dc7e3ab7b0b3647a2b89eca16215d9 (diff)
downloadlinux-a312e1706ce6c124f04ec85ddece240f3bb2a696.tar.xz
Merge tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: "Not a lot in terms of features this time around, mostly just cleanups and code consolidation: - Support for PI meta data read/write via io_uring, with NVMe and SCSI covered - Cleanup the per-op structure caching, making it consistent across various command types - Consolidate the various user mapped features into a concept called regions, making the various users of that consistent - Various cleanups and fixes" * tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux: (56 commits) io_uring/fdinfo: fix io_uring_show_fdinfo() misuse of ->d_iname io_uring: reuse io_should_terminate_tw() for cmds io_uring: Factor out a function to parse restrictions io_uring/rsrc: require cloned buffers to share accounting contexts io_uring: simplify the SQPOLL thread check when cancelling requests io_uring: expose read/write attribute capability io_uring/rw: don't gate retry on completion context io_uring/rw: handle -EAGAIN retry at IO completion time io_uring/rw: use io_rw_recycle() from cleanup path io_uring/rsrc: simplify the bvec iter count calculation io_uring: ensure io_queue_deferred() is out-of-line io_uring/rw: always clear ->bytes_done on io_async_rw setup io_uring/rw: use NULL for rw->free_iovec assigment io_uring/rw: don't mask in f_iocb_flags io_uring/msg_ring: Drop custom destructor io_uring: Move old async data allocation helper to header io_uring/rw: Allocate async data through helper io_uring/net: Allocate msghdr async data through helper io_uring/uring_cmd: Allocate async data through generic helper io_uring/poll: Allocate apoll with generic alloc_cache helper ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio-integrity.h25
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/io_uring_types.h26
-rw-r--r--include/linux/uio.h9
-rw-r--r--include/uapi/linux/fs.h9
-rw-r--r--include/uapi/linux/io_uring.h17
6 files changed, 68 insertions, 19 deletions
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index dbf0f74c1529..802f52e38efd 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -7,10 +7,12 @@
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
- BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
- BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
- BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
- BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */
+ BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */
+ BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */
+ BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */
+ BIP_CHECK_GUARD = 1 << 5, /* guard check */
+ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
+ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
};
struct bio_integrity_payload {
@@ -21,6 +23,7 @@ struct bio_integrity_payload {
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_max_vcnt; /* integrity bio_vec slots */
unsigned short bip_flags; /* control flags */
+ u16 app_tag; /* application tag value */
struct bvec_iter bio_iter; /* for rewinding parent bio */
@@ -30,6 +33,9 @@ struct bio_integrity_payload {
struct bio_vec bip_inline_vecs[];/* embedded bvec array */
};
+#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \
+ BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
#define bip_for_each_vec(bvl, bip, iter) \
@@ -72,7 +78,8 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
unsigned int nr);
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset);
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
+int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter);
+int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta);
void bio_integrity_unmap_user(struct bio *bio);
bool bio_integrity_prep(struct bio *bio);
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
@@ -98,8 +105,12 @@ static inline void bioset_integrity_free(struct bio_set *bs)
{
}
-static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
- ssize_t len)
+static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
+{
+ return -EINVAL;
+}
+
+static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta)
{
return -EINVAL;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b8447f53200..a4af70367f8a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -349,6 +349,7 @@ struct readahead_control;
#define IOCB_DIO_CALLER_COMP (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
#define IOCB_AIO_RW (1 << 23)
+#define IOCB_HAS_METADATA (1 << 24)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index fd4cdb0860a2..623d8e798a11 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -78,8 +78,9 @@ struct io_hash_table {
struct io_mapped_region {
struct page **pages;
- void *vmap_ptr;
- size_t nr_pages;
+ void *ptr;
+ unsigned nr_pages;
+ unsigned flags;
};
/*
@@ -293,6 +294,11 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
+ /*
+ * Modifications are protected by ->uring_lock and ->mmap_lock.
+ * The flags, buf_pages and buf_nr_pages fields should be stable
+ * once published.
+ */
struct xarray io_bl_xa;
struct io_hash_table cancel_table;
@@ -424,17 +430,10 @@ struct io_ring_ctx {
* side will need to grab this lock, to prevent either side from
* being run concurrently with the other.
*/
- struct mutex resize_lock;
-
- /*
- * If IORING_SETUP_NO_MMAP is used, then the below holds
- * the gup'ed pages for the two rings, and the sqes.
- */
- unsigned short n_ring_pages;
- unsigned short n_sqe_pages;
- struct page **ring_pages;
- struct page **sqe_pages;
+ struct mutex mmap_lock;
+ struct io_mapped_region sq_region;
+ struct io_mapped_region ring_region;
/* used for optimised request parameter and wait argument passing */
struct io_mapped_region param_region;
};
@@ -481,6 +480,7 @@ enum {
REQ_F_BL_NO_RECYCLE_BIT,
REQ_F_BUFFERS_COMMIT_BIT,
REQ_F_BUF_NODE_BIT,
+ REQ_F_HAS_METADATA_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -561,6 +561,8 @@ enum {
REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
/* buf node is valid */
REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
+ /* request has read/write metadata assigned */
+ REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 853f9de5aa05..8ada84e85447 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -82,6 +82,15 @@ struct iov_iter {
};
};
+typedef __u16 uio_meta_flags_t;
+
+struct uio_meta {
+ uio_meta_flags_t flags;
+ u16 app_tag;
+ u64 seed;
+ struct iov_iter iter;
+};
+
static inline const struct iovec *iter_iov(const struct iov_iter *iter)
{
if (iter->iter_type == ITER_UBUF)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 56a4f93a08f4..2bbe00cf1248 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index aac9a4f8fa9a..e11c82638527 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -98,6 +98,10 @@ struct io_uring_sqe {
__u64 addr3;
__u64 __pad2[1];
};
+ struct {
+ __u64 attr_ptr; /* pointer to attribute information */
+ __u64 attr_type_mask; /* bit mask of attributes */
+ };
__u64 optval;
/*
* If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +111,18 @@ struct io_uring_sqe {
};
};
+/* sqe->attr_type_mask flags */
+#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+ __u16 flags;
+ __u16 app_tag;
+ __u32 len;
+ __u64 addr;
+ __u64 seed;
+ __u64 rsvd;
+};
+
/*
* If sqe->file_index is set to this for opcodes that instantiate a new
* direct descriptor (like openat/openat2/accept), then io_uring will allocate
@@ -561,6 +577,7 @@ struct io_uring_params {
#define IORING_FEAT_REG_REG_RING (1U << 13)
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
+#define IORING_FEAT_RW_ATTR (1U << 16)
/*
* io_uring_register(2) opcodes and arguments