From 07d09774e2bfa21dedcee3ef45892bb20827b12c Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 20 May 2026 14:37:02 +0200 Subject: fsverity: generate and store zero-block hash Compute the hash of one filesystem block's worth of zeros. A filesystem implementation can decide to elide merkle tree blocks containing only this hash and synthesize the contents at read time. Let's pretend that there's a file containing 131 data block and whose merkle tree looks roughly like this: root +--leaf0 | +--data0 | +--data1 | +--... | `--data128 `--leaf1 +--data129 +--data130 `--data131 If data[0-128] are sparse holes, then leaf0 will contain a repeating sequence of @zero_digest. Therefore, leaf0 need not be written to disk because its contents can be synthesized. A subsequent xfs patch will use this to reduce the size of the merkle tree when dealing with sparse gold master disk images and the like. Note that this works only on the first-level (data holes). fsverity doesn't store/generate zero_digest for any higher levels. Add a helper to pre-fill folio with hashes of empty blocks. This will be used by iomap to synthesize blocks full of zero hashes on the fly. Signed-off-by: Darrick J. Wong Acked-by: Eric Biggers Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260520123722.405752-5-aalbersh@kernel.org Signed-off-by: Christian Brauner (Amutable) --- include/linux/fsverity.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index a8f9aa75b792..6c467ded9751 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -201,6 +201,8 @@ bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, size_t len, size_t offset); void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio); void fsverity_enqueue_verify_work(struct work_struct *work); +void fsverity_fill_zerohash(struct folio *folio, size_t offset, size_t len, + struct fsverity_info *vi); #else /* !CONFIG_FS_VERITY */ @@ -281,6 +283,12 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) WARN_ON_ONCE(1); } +static inline void fsverity_fill_zerohash(struct folio *folio, size_t offset, + size_t len, struct fsverity_info *vi) +{ + WARN_ON_ONCE(1); +} + #endif /* !CONFIG_FS_VERITY */ static inline bool fsverity_verify_folio(struct fsverity_info *vi, -- cgit v1.2.3 From 63e242afa4661632786a2129cf297a135a6995ba Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 20 May 2026 14:37:05 +0200 Subject: iomap: introduce IOMAP_F_FSVERITY and teach writeback to handle fsverity This flag indicates that I/O is for fsverity metadata. In the write path skip i_size check and i_size updates as metadata is past EOF. In writeback don't update i_size and continue writeback if even folio is beyond EOF. In read path don't zero fsverity folios, again they are past EOF. The iomap_block_needs_zeroing() is also called from write path. For folios of larger order we don't want to zero out pages in the folio as these could contain other merkle tree blocks. For fsverity, filesystem will request to read PAGE_SIZE memory regions. For data folios, iomap will zero the rest of the folio for anything which is beyond EOF. We don't want this for fsverity folios. Christian Brauner says: Changed IOMAP_F_FSVERITY from (1U << 10) to (1U << 11) to avoid colliding with IOMAP_F_ZERO_TAIL, which already uses (1U << 10). Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260520123722.405752-8-aalbersh@kernel.org Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Amutable) --- fs/iomap/buffered-io.c | 43 ++++++++++++++++++++++++++++++++++--------- fs/iomap/trace.h | 3 ++- include/linux/iomap.h | 8 ++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d6451c4208d4..d9462f28afbd 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -353,9 +353,26 @@ static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, { const struct iomap *srcmap = iomap_iter_srcmap(iter); - return srcmap->type != IOMAP_MAPPED || - (srcmap->flags & IOMAP_F_NEW) || - pos >= i_size_read(iter->inode); + /* + * If this block has not been written, there's nothing to read + */ + if (srcmap->type != IOMAP_MAPPED) + return true; + + /* + * Newly allocated blocks have not been written + */ + if (srcmap->flags & IOMAP_F_NEW) + return true; + + /* + * fsverity metadata is stored past i_size, we need to read it instead + * of zeroing + */ + if (srcmap->flags & IOMAP_F_FSVERITY) + return false; + + return pos >= i_size_read(iter->inode); } /** @@ -1170,13 +1187,14 @@ retry: * unlock and release the folio. */ old_size = iter->inode->i_size; - if (pos + written > old_size) { + if (pos + written > old_size && + !(iter->iomap.flags & IOMAP_F_FSVERITY)) { i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } __iomap_put_folio(iter, write_ops, written, folio); - if (old_size < pos) + if (old_size < pos && !(iter->iomap.flags & IOMAP_F_FSVERITY)) pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); @@ -1802,13 +1820,20 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, * Check interaction of the folio with the file end. * * If the folio is entirely beyond i_size, return false. If it straddles - * i_size, adjust end_pos and zero all data beyond i_size. + * i_size, adjust end_pos and zero all data beyond i_size. Don't skip fsverity + * folios as those are beyond i_size. */ -static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, - u64 *end_pos) +static bool iomap_writeback_handle_eof(struct folio *folio, + struct iomap_writepage_ctx *wpc, u64 *end_pos) { + struct inode *inode = wpc->inode; u64 isize = i_size_read(inode); + if (wpc->iomap.flags & IOMAP_F_FSVERITY) { + WARN_ON_ONCE(folio_pos(folio) < isize); + return true; + } + if (*end_pos > isize) { size_t poff = offset_in_folio(folio, isize); pgoff_t end_index = isize >> PAGE_SHIFT; @@ -1874,7 +1899,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) trace_iomap_writeback_folio(inode, pos, folio_size(folio)); - if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) + if (!iomap_writeback_handle_eof(folio, wpc, &end_pos)) return 0; WARN_ON_ONCE(end_pos <= pos); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 097773c6db80..e4dd25b27656 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -118,7 +118,8 @@ DEFINE_RANGE_EVENT(iomap_zero_iter); { IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \ { IOMAP_F_PRIVATE, "PRIVATE" }, \ { IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \ - { IOMAP_F_STALE, "STALE" } + { IOMAP_F_STALE, "STALE" }, \ + { IOMAP_F_FSVERITY, "FSVERITY" } #define IOMAP_DIO_STRINGS \ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index cea6bbc97b6e..5b2b40e81b61 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -91,6 +91,14 @@ struct vm_fault; #endif /* CONFIG_BLK_DEV_INTEGRITY */ #define IOMAP_F_ZERO_TAIL (1U << 10) +/* + * Indicates reads and writes of fsverity metadata. + * + * Fsverity metadata is stored after the regular file data and thus beyond + * i_size. + */ +#define IOMAP_F_FSVERITY (1U << 11) + /* * Flag reserved for file system specific usage */ -- cgit v1.2.3 From 1d140731753a277be36637300a0f3faa396edec1 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 20 May 2026 14:37:06 +0200 Subject: iomap: teach iomap to read files with fsverity Obtain fsverity info for folios with file data and fsverity metadata. Filesystem can pass vi down to ioend and then to fsverity for verification. This is different from other filesystems ext4, f2fs, btrfs supporting fsverity, these filesystems don't need fsverity_info for reading fsverity metadata. While reading merkle tree iomap requires fsverity info to synthesize hashes for zeroed data block. fsverity metadata has two kinds of holes - ones in merkle tree and one after fsverity descriptor. Merkle tree holes are blocks full of hashes of zeroed data blocks. These are not stored on the disk but synthesized on the fly. This saves a bit of space for sparse files. Due to this iomap also need to lookup fsverity_info for folios with fsverity metadata. ->vi has a hash of the zeroed data block which will be used to fill the merkle tree block. The hole past descriptor is interpreted as end of metadata region. As we don't have EOF here we use this hole as an indication that rest of the folio is empty. This patch marks rest of the folio beyond fsverity descriptor as uptodate. For file data, fsverity needs to verify consistency of the whole file against the root hash, hashes of holes are included in the merkle tree. Verify them too. Issue reading of fsverity merkle tree on the fsverity inodes. This way metadata will be available at I/O completion time. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260520123722.405752-9-aalbersh@kernel.org Signed-off-by: Christian Brauner (Amutable) --- fs/iomap/buffered-io.c | 41 +++++++++++++++++++++++++++++++++++++++-- fs/iomap/ioend.c | 1 + include/linux/iomap.h | 2 ++ 3 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d9462f28afbd..f9cc5a32e222 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" #include "trace.h" @@ -561,9 +562,27 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, if (plen == 0) return 0; - /* zero post-eof blocks as the page may be mapped */ - if (iomap_block_needs_zeroing(iter, pos)) { + /* + * Handling of fsverity "holes". We hit this for two case: + * 1. No need to go further, the hole after fsverity + * descriptor is the end of the fsverity metadata. + * + * 2. This folio contains merkle tree blocks which need to be + * synthesized. If we already have fsverity info (ctx->vi) + * synthesize these blocks. + */ + if ((iomap->flags & IOMAP_F_FSVERITY) && + iomap->type == IOMAP_HOLE) { + if (ctx->vi) + fsverity_fill_zerohash(folio, poff, plen, + ctx->vi); + iomap_set_range_uptodate(folio, poff, plen); + } else if (iomap_block_needs_zeroing(iter, pos)) { + /* zero post-eof blocks as the page may be mapped */ folio_zero_range(folio, poff, plen); + if (ctx->vi && + !fsverity_verify_blocks(ctx->vi, folio, plen, poff)) + return -EIO; iomap_set_range_uptodate(folio, poff, plen); } else { if (!*bytes_submitted) @@ -614,6 +633,15 @@ void iomap_read_folio(const struct iomap_ops *ops, trace_iomap_readpage(iter.inode, 1); + /* + * Fetch fsverity_info for both data and fsverity metadata, as iomap + * needs zeroed hash for merkle tree block synthesis + */ + ctx->vi = fsverity_get_info(iter.inode); + if (ctx->vi && iter.pos < i_size_read(iter.inode)) + fsverity_readahead(ctx->vi, folio->index, + folio_nr_pages(folio)); + while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_submitted); @@ -681,6 +709,15 @@ void iomap_readahead(const struct iomap_ops *ops, trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); + /* + * Fetch fsverity_info for both data and fsverity metadata, as iomap + * needs zeroed hash for merkle tree block synthesis + */ + ctx->vi = fsverity_get_info(iter.inode); + if (ctx->vi && iter.pos < i_size_read(iter.inode)) + fsverity_readahead(ctx->vi, readahead_index(rac), + readahead_count(rac)); + while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, ctx, &cur_bytes_submitted); diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index acf3cf98b23a..f7c3e0c70fd7 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -28,6 +28,7 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode, ioend->io_offset = file_offset; ioend->io_size = bio->bi_iter.bi_size; ioend->io_sector = bio->bi_iter.bi_sector; + ioend->io_vi = NULL; ioend->io_private = NULL; return ioend; } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 5b2b40e81b61..1be2e16b696a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -429,6 +429,7 @@ struct iomap_ioend { loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ void *io_private; /* file system private data */ + struct fsverity_info *io_vi; /* fsverity info */ struct bio io_bio; /* MUST BE LAST! */ }; @@ -503,6 +504,7 @@ struct iomap_read_folio_ctx { struct readahead_control *rac; void *read_ctx; loff_t read_ctx_file_offset; + struct fsverity_info *vi; }; struct iomap_read_ops { -- cgit v1.2.3 From 36a36c4cac914510123071fb58270f6380faed1b Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 20 May 2026 14:37:07 +0200 Subject: iomap: introduce iomap_fsverity_write() for writing fsverity metadata This is just a wrapper around iomap_file_buffered_write() to create necessary iterator over metadata. Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260520123722.405752-10-aalbersh@kernel.org Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner (Amutable) --- fs/iomap/buffered-io.c | 25 +++++++++++++++++++++++++ include/linux/iomap.h | 3 +++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f9cc5a32e222..dab4aacff313 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1290,6 +1290,31 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +int iomap_fsverity_write(struct file *file, loff_t pos, size_t length, + const void *buf, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops) +{ + int ret; + struct iov_iter iiter; + struct kvec kvec = { + .iov_base = (void *)buf, + .iov_len = length, + }; + struct kiocb iocb = { + .ki_filp = file, + .ki_ioprio = get_current_ioprio(), + .ki_pos = pos, + }; + + iov_iter_kvec(&iiter, WRITE, &kvec, 1, length); + + ret = iomap_file_buffered_write(&iocb, &iiter, ops, write_ops, NULL); + if (ret < 0) + return ret; + return ret == length ? 0 : -EIO; +} +EXPORT_SYMBOL_GPL(iomap_fsverity_write); + static void iomap_write_delalloc_ifs_punch(struct inode *inode, struct folio *folio, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1be2e16b696a..3582ed1fe236 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -353,6 +353,9 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); +int iomap_fsverity_write(struct file *file, loff_t pos, size_t length, + const void *buf, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops); void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx, void *private); void iomap_readahead(const struct iomap_ops *ops, -- cgit v1.2.3