summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 02:59:14 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-19 02:59:14 +0300
commitf60c55a94e1d127186566f06294f2dadd966e9b4 (patch)
tree2d3dbd572c0096d24f87f581194563ff76e07a6e /fs/ext4
parent734d1ed83e1f9b7bafb650033fb87c657858cf5b (diff)
parent95ae251fe82838b85c6d37e5a1775006e2a42ae0 (diff)
downloadlinux-f60c55a94e1d127186566f06294f2dadd966e9b4.tar.xz
Merge tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt
Pull fs-verity support from Eric Biggers: "fs-verity is a filesystem feature that provides Merkle tree based hashing (similar to dm-verity) for individual readonly files, mainly for the purpose of efficient authenticity verification. This pull request includes: (a) The fs/verity/ support layer and documentation. (b) fs-verity support for ext4 and f2fs. Compared to the original fs-verity patchset from last year, the UAPI to enable fs-verity on a file has been greatly simplified. Lots of other things were cleaned up too. fs-verity is planned to be used by two different projects on Android; most of the userspace code is in place already. Another userspace tool ("fsverity-utils"), and xfstests, are also available. e2fsprogs and f2fs-tools already have fs-verity support. Other people have shown interest in using fs-verity too. I've tested this on ext4 and f2fs with xfstests, both the existing tests and the new fs-verity tests. This has also been in linux-next since July 30 with no reported issues except a couple minor ones I found myself and folded in fixes for. Ted and I will be co-maintaining fs-verity" * tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt: f2fs: add fs-verity support ext4: update on-disk format documentation for fs-verity ext4: add fs-verity read support ext4: add basic fs-verity support fs-verity: support builtin file signatures fs-verity: add SHA-512 support fs-verity: implement FS_IOC_MEASURE_VERITY ioctl fs-verity: implement FS_IOC_ENABLE_VERITY ioctl fs-verity: add data verification hooks for ->readpages() fs-verity: add the hook for file ->setattr() fs-verity: add the hook for file ->open() fs-verity: add inode and superblock fields fs-verity: add Kconfig and the helper functions for hashing fs: uapi: define verity bit for FS_IOC_GETFLAGS fs-verity: add UAPI header fs-verity: add MAINTAINERS file entry fs-verity: add a documentation file
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Makefile1
-rw-r--r--fs/ext4/ext4.h23
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/readpage.c211
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/ext4/verity.c367
9 files changed, 645 insertions, 53 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8fdfcd3c3e04..b17ddc229ac5 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
+ext4-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf660aa7a9e0..9c7f4036021b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -41,6 +41,7 @@
#endif
#include <linux/fscrypt.h>
+#include <linux/fsverity.h>
#include <linux/compiler.h>
@@ -395,6 +396,7 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
@@ -402,7 +404,7 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
@@ -467,6 +469,7 @@ enum {
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
+ EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
@@ -512,6 +515,7 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(TOPDIR);
CHECK_FLAG_VALUE(HUGE_FILE);
CHECK_FLAG_VALUE(EXTENTS);
+ CHECK_FLAG_VALUE(VERITY);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(INLINE_DATA);
@@ -1560,6 +1564,7 @@ enum {
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
+ EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1610,6 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb) (sb)
#endif
+static inline bool ext4_verity_in_progress(struct inode *inode)
+{
+ return IS_ENABLED(CONFIG_FS_VERITY) &&
+ ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+}
+
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
@@ -1662,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000
+#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1756,6 +1768,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC)
EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM)
EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY)
EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT)
+EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY)
EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION)
EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE)
@@ -1813,7 +1826,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
EXT4_FEATURE_RO_COMPAT_QUOTA |\
- EXT4_FEATURE_RO_COMPAT_PROJECT)
+ EXT4_FEATURE_RO_COMPAT_PROJECT |\
+ EXT4_FEATURE_RO_COMPAT_VERITY)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -3177,6 +3191,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
extern int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages, bool is_readahead);
+extern int __init ext4_init_post_read_processing(void);
+extern void ext4_exit_post_read_processing(void);
/* symlink.c */
extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
@@ -3283,6 +3299,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+/* verity.c */
+extern const struct fsverity_operations ext4_verityops;
+
/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 70b0438dbc94..b8a20bb9a145 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -457,6 +457,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret)
return ret;
+ ret = fsverity_file_open(inode, filp);
+ if (ret)
+ return ret;
+
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 006b7a2070bf..d0dc0e3463db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1340,6 +1340,9 @@ retry_journal:
}
if (ret) {
+ bool extended = (pos + len > inode->i_size) &&
+ !ext4_verity_in_progress(inode);
+
unlock_page(page);
/*
* __block_write_begin may have instantiated a few blocks
@@ -1349,11 +1352,11 @@ retry_journal:
* Add inode to orphan list in case we crash before
* truncate finishes
*/
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (extended && ext4_can_truncate(inode))
ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
- if (pos + len > inode->i_size) {
+ if (extended) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
@@ -1406,6 +1409,7 @@ static int ext4_write_end(struct file *file,
int ret = 0, ret2;
int i_size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_write_end(inode, pos, len, copied);
if (inline_data) {
@@ -1423,12 +1427,16 @@ static int ext4_write_end(struct file *file,
/*
* it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
+ *
+ * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
+ * blocks are being written past EOF, so skip the i_size update.
*/
- i_size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1439,7 +1447,7 @@ static int ext4_write_end(struct file *file,
if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1450,7 +1458,7 @@ errout:
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -1511,6 +1519,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
int size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
@@ -1540,13 +1549,14 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial)
SetPageUptodate(page);
}
- size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
if (size_changed || inline_data) {
@@ -1555,7 +1565,7 @@ static int ext4_journalled_write_end(struct file *file,
ret = ret2;
}
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1566,7 +1576,7 @@ errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -2162,7 +2172,8 @@ static int ext4_writepage(struct page *page,
trace_ext4_writepage(page);
size = i_size_read(inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2246,7 +2257,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
* after page tables are updated.
*/
size = i_size_read(mpd->inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2345,6 +2357,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
+ if (ext4_verity_in_progress(inode))
+ blocks = EXT_MAX_BLOCKS;
+
do {
BUG_ON(buffer_locked(bh));
@@ -3061,8 +3076,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
- if (ext4_nonda_switch(inode->i_sb) ||
- S_ISLNK(inode->i_mode)) {
+ if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
+ ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3897,6 +3912,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
+ if (fsverity_active(inode))
+ return 0;
/*
* If we are doing data journalling we don't support O_DIRECT
@@ -4736,6 +4753,8 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
return false;
+ if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
+ return false;
return true;
}
@@ -4760,9 +4779,11 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
new_fl |= S_CASEFOLD;
+ if (flags & EXT4_VERITY_FL)
+ new_fl |= S_VERITY;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
- S_ENCRYPTED|S_CASEFOLD);
+ S_ENCRYPTED|S_CASEFOLD|S_VERITY);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5552,6 +5573,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
+ error = fsverity_prepare_setattr(dentry, attr);
+ if (error)
+ return error;
+
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5703d607f5af..5444d49cbf09 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1198,6 +1198,17 @@ out:
}
case EXT4_IOC_SHUTDOWN:
return ext4_shutdown(sb, arg);
+
+ case FS_IOC_ENABLE_VERITY:
+ if (!ext4_has_feature_verity(sb))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_enable(filp, (const void __user *)arg);
+
+ case FS_IOC_MEASURE_VERITY:
+ if (!ext4_has_feature_verity(sb))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_measure(filp, (void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1265,6 +1276,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
case EXT4_IOC_SHUTDOWN:
case FS_IOC_GETFSMAP:
+ case FS_IOC_ENABLE_VERITY:
+ case FS_IOC_MEASURE_VERITY:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index c916017db334..a30b203fa461 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -47,13 +47,103 @@
#include "ext4.h"
-static inline bool ext4_bio_encrypted(struct bio *bio)
+#define NUM_PREALLOC_POST_READ_CTXS 128
+
+static struct kmem_cache *bio_post_read_ctx_cache;
+static mempool_t *bio_post_read_ctx_pool;
+
+/* postprocessing steps for read bios */
+enum bio_post_read_step {
+ STEP_INITIAL = 0,
+ STEP_DECRYPT,
+ STEP_VERITY,
+};
+
+struct bio_post_read_ctx {
+ struct bio *bio;
+ struct work_struct work;
+ unsigned int cur_step;
+ unsigned int enabled_steps;
+};
+
+static void __read_end_io(struct bio *bio)
{
-#ifdef CONFIG_FS_ENCRYPTION
- return unlikely(bio->bi_private != NULL);
-#else
- return false;
-#endif
+ struct page *page;
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ page = bv->bv_page;
+
+ /* PG_error was set if any post_read step failed */
+ if (bio->bi_status || PageError(page)) {
+ ClearPageUptodate(page);
+ /* will re-read again later */
+ ClearPageError(page);
+ } else {
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ }
+ if (bio->bi_private)
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+ bio_put(bio);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
+
+static void decrypt_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fscrypt_decrypt_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
+static void verity_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fsverity_verify_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
+{
+ /*
+ * We use different work queues for decryption and for verity because
+ * verity may require reading metadata pages that need decryption, and
+ * we shouldn't recurse to the same workqueue.
+ */
+ switch (++ctx->cur_step) {
+ case STEP_DECRYPT:
+ if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
+ INIT_WORK(&ctx->work, decrypt_work);
+ fscrypt_enqueue_decrypt_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
+ case STEP_VERITY:
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
+ default:
+ __read_end_io(ctx->bio);
+ }
+}
+
+static bool bio_post_read_required(struct bio *bio)
+{
+ return bio->bi_private && !bio->bi_status;
}
/*
@@ -70,30 +160,53 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
*/
static void mpage_end_io(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ if (bio_post_read_required(bio)) {
+ struct bio_post_read_ctx *ctx = bio->bi_private;
- if (ext4_bio_encrypted(bio)) {
- if (bio->bi_status) {
- fscrypt_release_ctx(bio->bi_private);
- } else {
- fscrypt_enqueue_decrypt_bio(bio->bi_private, bio);
- return;
- }
+ ctx->cur_step = STEP_INITIAL;
+ bio_post_read_processing(ctx);
+ return;
}
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
+ __read_end_io(bio);
+}
- if (!bio->bi_status) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
+static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
+static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
+ struct bio *bio,
+ pgoff_t first_idx)
+{
+ unsigned int post_read_steps = 0;
+ struct bio_post_read_ctx *ctx = NULL;
+
+ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
+ post_read_steps |= 1 << STEP_DECRYPT;
+
+ if (ext4_need_verity(inode, first_idx))
+ post_read_steps |= 1 << STEP_VERITY;
+
+ if (post_read_steps) {
+ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ ctx->bio = bio;
+ ctx->enabled_steps = post_read_steps;
+ bio->bi_private = ctx;
}
+ return ctx;
+}
- bio_put(bio);
+static inline loff_t ext4_readpage_limit(struct inode *inode)
+{
+ if (IS_ENABLED(CONFIG_FS_VERITY) &&
+ (IS_VERITY(inode) || ext4_verity_in_progress(inode)))
+ return inode->i_sb->s_maxbytes;
+
+ return i_size_read(inode);
}
int ext4_mpage_readpages(struct address_space *mapping,
@@ -141,7 +254,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
+ last_block_in_file = (ext4_readpage_limit(inode) +
+ blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
@@ -218,6 +332,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
zero_user_segment(page, first_hole << blkbits,
PAGE_SIZE);
if (first_hole == 0) {
+ if (ext4_need_verity(inode, page->index) &&
+ !fsverity_verify_page(page))
+ goto set_error_page;
SetPageUptodate(page);
unlock_page(page);
goto next_page;
@@ -241,18 +358,16 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (bio == NULL) {
- struct fscrypt_ctx *ctx = NULL;
+ struct bio_post_read_ctx *ctx;
- if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
- ctx = fscrypt_get_ctx(GFP_NOFS);
- if (IS_ERR(ctx))
- goto set_error_page;
- }
bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES));
- if (!bio) {
- if (ctx)
- fscrypt_release_ctx(ctx);
+ if (!bio)
+ goto set_error_page;
+ ctx = get_bio_post_read_ctx(inode, bio, page->index);
+ if (IS_ERR(ctx)) {
+ bio_put(bio);
+ bio = NULL;
goto set_error_page;
}
bio_set_dev(bio, bdev);
@@ -293,3 +408,29 @@ int ext4_mpage_readpages(struct address_space *mapping,
submit_bio(bio);
return 0;
}
+
+int __init ext4_init_post_read_processing(void)
+{
+ bio_post_read_ctx_cache =
+ kmem_cache_create("ext4_bio_post_read_ctx",
+ sizeof(struct bio_post_read_ctx), 0, 0, NULL);
+ if (!bio_post_read_ctx_cache)
+ goto fail;
+ bio_post_read_ctx_pool =
+ mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
+ bio_post_read_ctx_cache);
+ if (!bio_post_read_ctx_pool)
+ goto fail_free_cache;
+ return 0;
+
+fail_free_cache:
+ kmem_cache_destroy(bio_post_read_ctx_cache);
+fail:
+ return -ENOMEM;
+}
+
+void ext4_exit_post_read_processing(void)
+{
+ mempool_destroy(bio_post_read_ctx_pool);
+ kmem_cache_destroy(bio_post_read_ctx_cache);
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 757819139b8f..27cd622676e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1182,6 +1182,7 @@ void ext4_clear_inode(struct inode *inode)
EXT4_I(inode)->jinode = NULL;
}
fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -4275,6 +4276,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &ext4_verityops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
@@ -4422,6 +4426,11 @@ no_journal:
goto failed_mount_wq;
}
+ if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
+ goto failed_mount_wq;
+ }
+
if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
!ext4_has_feature_encrypt(sb)) {
ext4_set_feature_encrypt(sb);
@@ -6098,6 +6107,10 @@ static int __init ext4_init_fs(void)
err = ext4_init_pending();
if (err)
+ goto out7;
+
+ err = ext4_init_post_read_processing();
+ if (err)
goto out6;
err = ext4_init_pageio();
@@ -6138,8 +6151,10 @@ out3:
out4:
ext4_exit_pageio();
out5:
- ext4_exit_pending();
+ ext4_exit_post_read_processing();
out6:
+ ext4_exit_pending();
+out7:
ext4_exit_es();
return err;
@@ -6156,6 +6171,7 @@ static void __exit ext4_exit_fs(void)
ext4_exit_sysfs();
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_post_read_processing();
ext4_exit_es();
ext4_exit_pending();
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b3cd7655a6ff..eb1efad0e20a 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -242,6 +242,9 @@ EXT4_ATTR_FEATURE(encryption);
#ifdef CONFIG_UNICODE
EXT4_ATTR_FEATURE(casefold);
#endif
+#ifdef CONFIG_FS_VERITY
+EXT4_ATTR_FEATURE(verity);
+#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
@@ -254,6 +257,9 @@ static struct attribute *ext4_feat_attrs[] = {
#ifdef CONFIG_UNICODE
ATTR_LIST(casefold),
#endif
+#ifdef CONFIG_FS_VERITY
+ ATTR_LIST(verity),
+#endif
ATTR_LIST(metadata_csum_seed),
NULL,
};
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
new file mode 100644
index 000000000000..d0d8a9795dd6
--- /dev/null
+++ b/fs/ext4/verity.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/ext4/verity.c: fs-verity support for ext4
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Implementation of fsverity_operations for ext4.
+ *
+ * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
+ * the end of the file, starting at the first 64K boundary beyond i_size. This
+ * approach works because (a) verity files are readonly, and (b) pages fully
+ * beyond i_size aren't visible to userspace but can be read/written internally
+ * by ext4 with only some relatively small changes to ext4. This approach
+ * avoids having to depend on the EA_INODE feature and on rearchitecturing
+ * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
+ * to support encrypting xattrs. Note that the verity metadata *must* be
+ * encrypted when the file is, since it contains hashes of the plaintext data.
+ *
+ * Using a 64K boundary rather than a 4K one keeps things ready for
+ * architectures with 64K pages, and it doesn't necessarily waste space on-disk
+ * since there can be a hole between i_size and the start of the Merkle tree.
+ */
+
+#include <linux/quotaops.h>
+
+#include "ext4.h"
+#include "ext4_extents.h"
+#include "ext4_jbd2.h"
+
+static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
+{
+ return round_up(inode->i_size, 65536);
+}
+
+/*
+ * Read some verity metadata from the inode. __vfs_read() can't be used because
+ * we need to read beyond i_size.
+ */
+static int pagecache_read(struct inode *inode, void *buf, size_t count,
+ loff_t pos)
+{
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *addr;
+
+ page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
+ NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ addr = kmap_atomic(page);
+ memcpy(buf, addr + offset_in_page(pos), n);
+ kunmap_atomic(addr);
+
+ put_page(page);
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
+ * kernel_write() can't be used because the file descriptor is readonly.
+ */
+static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ loff_t pos)
+{
+ if (pos + count > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *fsdata;
+ void *addr;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+ &page, &fsdata);
+ if (res)
+ return res;
+
+ addr = kmap_atomic(page);
+ memcpy(addr + offset_in_page(pos), buf, n);
+ kunmap_atomic(addr);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+ if (res < 0)
+ return res;
+ if (res != n)
+ return -EIO;
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+static int ext4_begin_enable_verity(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ const int credits = 2; /* superblock and inode for ext4_orphan_add() */
+ handle_t *handle;
+ int err;
+
+ if (ext4_verity_in_progress(inode))
+ return -EBUSY;
+
+ /*
+ * Since the file was opened readonly, we have to initialize the jbd
+ * inode and quotas here and not rely on ->open() doing it. This must
+ * be done before evicting the inline data.
+ */
+
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
+ return err;
+
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
+ err = ext4_convert_inline_data(inode);
+ if (err)
+ return err;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ ext4_warning_inode(inode,
+ "verity is only allowed on extent-based files");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * ext4 uses the last allocated block to find the verity descriptor, so
+ * we must remove any other blocks past EOF which might confuse things.
+ */
+ err = ext4_truncate(inode);
+ if (err)
+ return err;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ err = ext4_orphan_add(handle, inode);
+ if (err == 0)
+ ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+
+ ext4_journal_stop(handle);
+ return err;
+}
+
+/*
+ * ext4 stores the verity descriptor beginning on the next filesystem block
+ * boundary after the Merkle tree. Then, the descriptor size is stored in the
+ * last 4 bytes of the last allocated filesystem block --- which is either the
+ * block in which the descriptor ends, or the next block after that if there
+ * weren't at least 4 bytes remaining.
+ *
+ * We can't simply store the descriptor in an xattr because it *must* be
+ * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
+ * xattrs. Also, if the descriptor includes a large signature blob it may be
+ * too large to store in an xattr without the EA_INODE feature.
+ */
+static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
+ merkle_tree_size, i_blocksize(inode));
+ const u64 desc_end = desc_pos + desc_size;
+ const __le32 desc_size_disk = cpu_to_le32(desc_size);
+ const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
+ i_blocksize(inode)) -
+ sizeof(desc_size_disk);
+ int err;
+
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+ if (err)
+ return err;
+
+ return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
+ desc_size_pos);
+}
+
+static int ext4_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct inode *inode = file_inode(filp);
+ const int credits = 2; /* superblock and inode for ext4_orphan_del() */
+ handle_t *handle;
+ int err = 0;
+ int err2;
+
+ if (desc != NULL) {
+ /* Succeeded; write the verity descriptor. */
+ err = ext4_write_verity_descriptor(inode, desc, desc_size,
+ merkle_tree_size);
+
+ /* Write all pages before clearing VERITY_IN_PROGRESS. */
+ if (!err)
+ err = filemap_write_and_wait(inode->i_mapping);
+ }
+
+ /* If we failed, truncate anything we wrote past i_size. */
+ if (desc == NULL || err)
+ ext4_truncate(inode);
+
+ /*
+ * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
+ * deleting the inode from the orphan list, even if something failed.
+ * If everything succeeded, we'll also set the verity bit in the same
+ * transaction.
+ */
+
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
+ if (IS_ERR(handle)) {
+ ext4_orphan_del(NULL, inode);
+ return PTR_ERR(handle);
+ }
+
+ err2 = ext4_orphan_del(handle, inode);
+ if (err2)
+ goto out_stop;
+
+ if (desc != NULL && !err) {
+ struct ext4_iloc iloc;
+
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_stop;
+ ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
+ ext4_set_inode_flags(inode);
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ }
+out_stop:
+ ext4_journal_stop(handle);
+ return err ?: err2;
+}
+
+static int ext4_get_verity_descriptor_location(struct inode *inode,
+ size_t *desc_size_ret,
+ u64 *desc_pos_ret)
+{
+ struct ext4_ext_path *path;
+ struct ext4_extent *last_extent;
+ u32 end_lblk;
+ u64 desc_size_pos;
+ __le32 desc_size_disk;
+ u32 desc_size;
+ u64 desc_pos;
+ int err;
+
+ /*
+ * Descriptor size is in last 4 bytes of last allocated block.
+ * See ext4_write_verity_descriptor().
+ */
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
+ return -EFSCORRUPTED;
+ }
+
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ last_extent = path[path->p_depth].p_ext;
+ if (!last_extent) {
+ EXT4_ERROR_INODE(inode, "verity file has no extents");
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ return -EFSCORRUPTED;
+ }
+
+ end_lblk = le32_to_cpu(last_extent->ee_block) +
+ ext4_ext_get_actual_len(last_extent);
+ desc_size_pos = (u64)end_lblk << inode->i_blkbits;
+ ext4_ext_drop_refs(path);
+ kfree(path);
+
+ if (desc_size_pos < sizeof(desc_size_disk))
+ goto bad;
+ desc_size_pos -= sizeof(desc_size_disk);
+
+ err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
+ desc_size_pos);
+ if (err)
+ return err;
+ desc_size = le32_to_cpu(desc_size_disk);
+
+ /*
+ * The descriptor is stored just before the desc_size_disk, but starting
+ * on a filesystem block boundary.
+ */
+
+ if (desc_size > INT_MAX || desc_size > desc_size_pos)
+ goto bad;
+
+ desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
+ if (desc_pos < ext4_verity_metadata_pos(inode))
+ goto bad;
+
+ *desc_size_ret = desc_size;
+ *desc_pos_ret = desc_pos;
+ return 0;
+
+bad:
+ EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
+ return -EFSCORRUPTED;
+}
+
+static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ size_t desc_size = 0;
+ u64 desc_pos = 0;
+ int err;
+
+ err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
+ if (err)
+ return err;
+
+ if (buf_size) {
+ if (desc_size > buf_size)
+ return -ERANGE;
+ err = pagecache_read(inode, buf, desc_size, desc_pos);
+ if (err)
+ return err;
+ }
+ return desc_size;
+}
+
+static struct page *ext4_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index)
+{
+ index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ return read_mapping_page(inode->i_mapping, index, NULL);
+}
+
+static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
+
+ return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+}
+
+const struct fsverity_operations ext4_verityops = {
+ .begin_enable_verity = ext4_begin_enable_verity,
+ .end_enable_verity = ext4_end_enable_verity,
+ .get_verity_descriptor = ext4_get_verity_descriptor,
+ .read_merkle_tree_page = ext4_read_merkle_tree_page,
+ .write_merkle_tree_block = ext4_write_merkle_tree_block,
+};