From 8802e580ee643e3f63c6b39ff64e7c7baa4a55ba Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2023 14:27:13 +0200 Subject: fs: create __sb_write_started() helper Similar to sb_write_started() for use by other sb freeze levels. Unlike the boolean sb_write_started(), this helper returns a tristate to distiguish the cases of lockdep disabled or unknown lock state. This is needed for fanotify "pre content" events. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231122122715.2561213-15-amir73il@gmail.com Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98b7a7a8c42e..ac8b5a9b467b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1645,9 +1645,23 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) +/** + * __sb_write_started - check if sb freeze level is held + * @sb: the super we write to + * @level: the freeze level + * + * > 0 sb freeze level is held + * 0 sb freeze level is not held + * < 0 !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN + */ +static inline int __sb_write_started(const struct super_block *sb, int level) +{ + return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); +} + static inline bool sb_write_started(const struct super_block *sb) { - return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); + return __sb_write_started(sb, SB_FREEZE_WRITE); } /** -- cgit v1.2.3 From 3d5cd4911e04683df8f4439fddd788e00a2510a8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2023 14:27:14 +0200 Subject: fs: create file_write_started() helper Convenience wrapper for sb_write_started(file_inode(inode)->i_sb)), which has a single occurrence in the code right now. Document the false negatives of those helpers, which makes them unusable to assert that sb_start_write() is not held. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231122122715.2561213-16-amir73il@gmail.com Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- fs/read_write.c | 2 +- include/linux/fs.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/read_write.c b/fs/read_write.c index 92c68ab4f221..f791555fa246 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1423,7 +1423,7 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + lockdep_assert(file_write_started(file_out)); return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); diff --git a/include/linux/fs.h b/include/linux/fs.h index ac8b5a9b467b..75a10b632edd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1659,11 +1659,32 @@ static inline int __sb_write_started(const struct super_block *sb, int level) return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); } +/** + * sb_write_started - check if SB_FREEZE_WRITE is held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ static inline bool sb_write_started(const struct super_block *sb) { return __sb_write_started(sb, SB_FREEZE_WRITE); } +/** + * file_write_started - check if SB_FREEZE_WRITE is held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_started(file_inode(file)->i_sb); +} + /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to -- cgit v1.2.3 From 21b32e6a0ab5b174fa1ca2fb4c212577cf405d83 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2023 14:27:15 +0200 Subject: fs: create {sb,file}_write_not_started() helpers Create new helpers {sb,file}_write_not_started() that can be used to assert that sb_start_write() is not held. This is needed for fanotify "pre content" events. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231122122715.2561213-17-amir73il@gmail.com Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/linux/fs.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 75a10b632edd..ae0e2fb7bcea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1670,6 +1670,17 @@ static inline bool sb_write_started(const struct super_block *sb) return __sb_write_started(sb, SB_FREEZE_WRITE); } +/** + * sb_write_not_started - check if SB_FREEZE_WRITE is not held + * @sb: the super we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + */ +static inline bool sb_write_not_started(const struct super_block *sb) +{ + return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0; +} + /** * file_write_started - check if SB_FREEZE_WRITE is held * @file: the file we write to @@ -1685,6 +1696,21 @@ static inline bool file_write_started(const struct file *file) return sb_write_started(file_inode(file)->i_sb); } +/** + * file_write_not_started - check if SB_FREEZE_WRITE is not held + * @file: the file we write to + * + * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. + * May be false positive with !S_ISREG, because file_start_write() has + * no effect on !S_ISREG. + */ +static inline bool file_write_not_started(const struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return true; + return sb_write_not_started(file_inode(file)->i_sb); +} + /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to -- cgit v1.2.3 From 488e8f685207e0758398963d6834f81e5e61c162 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 30 Nov 2023 16:16:22 +0200 Subject: fs: fork splice_file_range() from do_splice_direct() In preparation of calling do_splice_direct() without file_start_write() held, create a new helper splice_file_range(), to be called from context of ->copy_file_range() methods instead of do_splice_direct(). Currently, the only difference is that splice_file_range() does not take flags argument and that it asserts that file_start_write() is held, but we factor out a common helper do_splice_direct_actor() that will be used later. Use the new helper from __ceph_copy_file_range(), that was incorrectly passing to do_splice_direct() the copy flags argument as splice flags. The value of copy flags in ceph is always 0, so it is a smenatic bug fix. Move the declaration of both helpers to linux/splice.h. Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231130141624.3338942-2-amir73il@gmail.com Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ceph/file.c | 9 ++++--- fs/read_write.c | 6 ++--- fs/splice.c | 71 ++++++++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 2 -- include/linux/splice.h | 13 +++++---- 5 files changed, 66 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b5aae29e944..f11de6e1f1c1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -3010,8 +3011,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * {read,write}_iter, which will get caps again. */ put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got); - ret = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, src_objlen, flags); + ret = splice_file_range(src_file, &src_off, dst_file, &dst_off, + src_objlen); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { doutc(cl, "Failed partial copy (%zd)\n", ret); @@ -3065,8 +3066,8 @@ out_caps: */ if (len && (len < src_ci->i_layout.object_size)) { doutc(cl, "Final partial copy of %zu bytes\n", len); - bytes = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, len, flags); + bytes = splice_file_range(src_file, &src_off, dst_file, + &dst_off, len); if (bytes > 0) ret += bytes; else diff --git a/fs/read_write.c b/fs/read_write.c index f791555fa246..642c7ce1ced1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1423,10 +1423,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - lockdep_assert(file_write_started(file_out)); - - return do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + return splice_file_range(file_in, &pos_in, file_out, &pos_out, + min_t(size_t, len, MAX_RW_COUNT)); } EXPORT_SYMBOL(generic_copy_file_range); diff --git a/fs/splice.c b/fs/splice.c index 3fce5f6072dd..9007b2c8baa8 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1170,25 +1170,10 @@ static void direct_file_splice_eof(struct splice_desc *sd) file->f_op->splice_eof(file); } -/** - * do_splice_direct - splices data directly between two files - * @in: file to splice from - * @ppos: input file offset - * @out: file to splice to - * @opos: output file offset - * @len: number of bytes to splice - * @flags: splice modifier flags - * - * Description: - * For use by do_sendfile(). splice can easily emulate sendfile, but - * doing it in the application would incur an extra system call - * (splice in + splice out, as compared to just sendfile()). So this helper - * can splice directly through a process-private pipe. - * - * Callers already called rw_verify_area() on the entire range. - */ -long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags) +static long do_splice_direct_actor(struct file *in, loff_t *ppos, + struct file *out, loff_t *opos, + size_t len, unsigned int flags, + splice_direct_actor *actor) { struct splice_desc sd = { .len = len, @@ -1207,14 +1192,60 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; - ret = splice_direct_to_actor(in, &sd, direct_splice_actor); + ret = splice_direct_to_actor(in, &sd, actor); if (ret > 0) *ppos = sd.pos; return ret; } +/** + * do_splice_direct - splices data directly between two files + * @in: file to splice from + * @ppos: input file offset + * @out: file to splice to + * @opos: output file offset + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Description: + * For use by do_sendfile(). splice can easily emulate sendfile, but + * doing it in the application would incur an extra system call + * (splice in + splice out, as compared to just sendfile()). So this helper + * can splice directly through a process-private pipe. + * + * Callers already called rw_verify_area() on the entire range. + */ +long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags) +{ + return do_splice_direct_actor(in, ppos, out, opos, len, flags, + direct_splice_actor); +} EXPORT_SYMBOL(do_splice_direct); +/** + * splice_file_range - splices data between two files for copy_file_range() + * @in: file to splice from + * @ppos: input file offset + * @out: file to splice to + * @opos: output file offset + * @len: number of bytes to splice + * + * Description: + * For use by generic_copy_file_range() and ->copy_file_range() methods. + * + * Callers already called rw_verify_area() on the entire range. + */ +long splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len) +{ + lockdep_assert(file_write_started(out)); + + return do_splice_direct_actor(in, ppos, out, opos, len, 0, + direct_splice_actor); +} +EXPORT_SYMBOL(splice_file_range); + static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { for (;;) { diff --git a/include/linux/fs.h b/include/linux/fs.h index ae0e2fb7bcea..04422a0eccdd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3052,8 +3052,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags); extern void diff --git a/include/linux/splice.h b/include/linux/splice.h index 6c461573434d..49532d5dda52 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -80,11 +80,14 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *, long vfs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, - splice_direct_actor *); -extern long do_splice(struct file *in, loff_t *off_in, - struct file *out, loff_t *off_out, - size_t len, unsigned int flags); +ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd, + splice_direct_actor *actor); +long do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags); +long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); +long splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len); extern long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags); -- cgit v1.2.3 From 0f292086c22b43202daffc14b585d3b54b9a1206 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2023 11:44:36 +0200 Subject: splice: return type ssize_t from all helpers Not sure why some splice helpers return long, maybe historic reasons. Change them all to return ssize_t to conform to the splice methods and to the rest of the helpers. Suggested-by: Christian Brauner Link: https://lore.kernel.org/r/20231208-horchen-helium-d3ec1535ede5@brauner/ Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231212094440.250945-2-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/internal.h | 8 ++-- fs/overlayfs/copy_up.c | 2 +- fs/read_write.c | 2 +- fs/splice.c | 104 +++++++++++++++++++++++++------------------------ include/linux/splice.h | 43 ++++++++++---------- io_uring/splice.c | 4 +- 6 files changed, 82 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/fs/internal.h b/fs/internal.h index 58e43341aebf..4eaa758e3bfd 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -243,10 +243,10 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, /* * fs/splice.c: */ -long splice_file_to_pipe(struct file *in, - struct pipe_inode_info *opipe, - loff_t *offset, - size_t len, unsigned int flags); +ssize_t splice_file_to_pipe(struct file *in, + struct pipe_inode_info *opipe, + loff_t *offset, + size_t len, unsigned int flags); /* * fs/xattr.c: diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2f587ee0b334..070d36ba0f09 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -285,7 +285,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; - long bytes; + ssize_t bytes; if (len < this_len) this_len = len; diff --git a/fs/read_write.c b/fs/read_write.c index 01a14570015b..7783b8522693 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1214,7 +1214,7 @@ COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, #endif /* CONFIG_COMPAT */ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, - size_t count, loff_t max) + size_t count, loff_t max) { struct fd in, out; struct inode *in_inode, *out_inode; diff --git a/fs/splice.c b/fs/splice.c index 7cda013e5a1e..c39d1abf23c8 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -201,7 +201,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, unsigned int tail = pipe->tail; unsigned int head = pipe->head; unsigned int mask = pipe->ring_size - 1; - int ret = 0, page_nr = 0; + ssize_t ret = 0; + int page_nr = 0; if (!spd_pages) return 0; @@ -932,8 +933,8 @@ static int warn_unsupported(struct file *file, const char *op) /* * Attempt to initiate a splice from pipe to file. */ -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +static ssize_t do_splice_from(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { if (unlikely(!out->f_op->splice_write)) return warn_unsupported(out, "write"); @@ -955,9 +956,9 @@ static void do_splice_eof(struct splice_desc *sd) * Callers already called rw_verify_area() on the entire range. * No need to call it for sub ranges. */ -static long do_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +static ssize_t do_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { unsigned int p_space; @@ -999,11 +1000,11 @@ static long do_splice_read(struct file *in, loff_t *ppos, * If successful, it returns the amount of data spliced, 0 if it hit the EOF or * a hole and a negative error code otherwise. */ -long vfs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +ssize_t vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { - int ret; + ssize_t ret; ret = rw_verify_area(READ, in, ppos, len); if (unlikely(ret < 0)) @@ -1030,7 +1031,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, splice_direct_actor *actor) { struct pipe_inode_info *pipe; - long ret, bytes; + ssize_t ret, bytes; size_t len; int i, flags, more; @@ -1181,10 +1182,10 @@ static void direct_file_splice_eof(struct splice_desc *sd) file->f_op->splice_eof(file); } -static long do_splice_direct_actor(struct file *in, loff_t *ppos, - struct file *out, loff_t *opos, - size_t len, unsigned int flags, - splice_direct_actor *actor) +static ssize_t do_splice_direct_actor(struct file *in, loff_t *ppos, + struct file *out, loff_t *opos, + size_t len, unsigned int flags, + splice_direct_actor *actor) { struct splice_desc sd = { .len = len, @@ -1195,7 +1196,7 @@ static long do_splice_direct_actor(struct file *in, loff_t *ppos, .splice_eof = direct_file_splice_eof, .opos = opos, }; - long ret; + ssize_t ret; if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; @@ -1226,8 +1227,8 @@ static long do_splice_direct_actor(struct file *in, loff_t *ppos, * * Callers already called rw_verify_area() on the entire range. */ -long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags) +ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags) { return do_splice_direct_actor(in, ppos, out, opos, len, flags, direct_splice_actor); @@ -1249,8 +1250,8 @@ EXPORT_SYMBOL(do_splice_direct); * * Callers already called rw_verify_area() on the entire range. */ -long splice_file_range(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len) +ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len) { lockdep_assert(file_write_started(out)); @@ -1280,12 +1281,12 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); -long splice_file_to_pipe(struct file *in, - struct pipe_inode_info *opipe, - loff_t *offset, - size_t len, unsigned int flags) +ssize_t splice_file_to_pipe(struct file *in, + struct pipe_inode_info *opipe, + loff_t *offset, + size_t len, unsigned int flags) { - long ret; + ssize_t ret; pipe_lock(opipe); ret = wait_for_space(opipe, flags); @@ -1300,13 +1301,13 @@ long splice_file_to_pipe(struct file *in, /* * Determine where to splice to/from. */ -long do_splice(struct file *in, loff_t *off_in, struct file *out, - loff_t *off_out, size_t len, unsigned int flags) +ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset; - long ret; + ssize_t ret; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) @@ -1397,14 +1398,14 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out, return ret; } -static long __do_splice(struct file *in, loff_t __user *off_in, - struct file *out, loff_t __user *off_out, - size_t len, unsigned int flags) +static ssize_t __do_splice(struct file *in, loff_t __user *off_in, + struct file *out, loff_t __user *off_out, + size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset, *__off_in = NULL, *__off_out = NULL; - long ret; + ssize_t ret; ipipe = get_pipe_info(in, true); opipe = get_pipe_info(out, true); @@ -1443,16 +1444,16 @@ static long __do_splice(struct file *in, loff_t __user *off_in, return ret; } -static int iter_to_pipe(struct iov_iter *from, - struct pipe_inode_info *pipe, - unsigned flags) +static ssize_t iter_to_pipe(struct iov_iter *from, + struct pipe_inode_info *pipe, + unsigned int flags) { struct pipe_buffer buf = { .ops = &user_page_pipe_buf_ops, .flags = flags }; size_t total = 0; - int ret = 0; + ssize_t ret = 0; while (iov_iter_count(from)) { struct page *pages[16]; @@ -1501,8 +1502,8 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ -static long vmsplice_to_user(struct file *file, struct iov_iter *iter, - unsigned int flags) +static ssize_t vmsplice_to_user(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { @@ -1510,7 +1511,7 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, .flags = flags, .u.data = iter }; - long ret = 0; + ssize_t ret = 0; if (!pipe) return -EBADF; @@ -1534,11 +1535,11 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, - unsigned int flags) +static ssize_t vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe; - long ret = 0; + ssize_t ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) @@ -1634,7 +1635,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, size_t, len, unsigned int, flags) { struct fd in, out; - long error; + ssize_t error; if (unlikely(!len)) return 0; @@ -1648,7 +1649,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, out = fdget(fd_out); if (out.file) { error = __do_splice(in.file, off_in, out.file, off_out, - len, flags); + len, flags); fdput(out); } fdput(in); @@ -1871,15 +1872,15 @@ retry: /* * Link contents of ipipe to opipe. */ -static int link_pipe(struct pipe_inode_info *ipipe, - struct pipe_inode_info *opipe, - size_t len, unsigned int flags) +static ssize_t link_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; unsigned int i_mask, o_mask; - int ret = 0; + ssize_t ret = 0; /* * Potential ABBA deadlock, work around it by ordering lock @@ -1962,11 +1963,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ -long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) +ssize_t do_tee(struct file *in, struct file *out, size_t len, + unsigned int flags) { struct pipe_inode_info *ipipe = get_pipe_info(in, true); struct pipe_inode_info *opipe = get_pipe_info(out, true); - int ret = -EINVAL; + ssize_t ret = -EINVAL; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) @@ -2003,7 +2005,7 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { struct fd in, out; - int error; + ssize_t error; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; diff --git a/include/linux/splice.h b/include/linux/splice.h index 49532d5dda52..068a8e8ffd73 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -68,31 +68,30 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, typedef int (splice_direct_actor)(struct pipe_inode_info *, struct splice_desc *); -extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int, - splice_actor *); -extern ssize_t __splice_from_pipe(struct pipe_inode_info *, - struct splice_desc *, splice_actor *); -extern ssize_t splice_to_pipe(struct pipe_inode_info *, - struct splice_pipe_desc *); -extern ssize_t add_to_pipe(struct pipe_inode_info *, - struct pipe_buffer *); -long vfs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor); +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct splice_desc *sd, splice_actor *actor); +ssize_t splice_to_pipe(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf); +ssize_t vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd, splice_direct_actor *actor); -long do_splice(struct file *in, loff_t *off_in, struct file *out, - loff_t *off_out, size_t len, unsigned int flags); -long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags); -long splice_file_range(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len); +ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, + loff_t *off_out, size_t len, unsigned int flags); +ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); +ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len); -extern long do_tee(struct file *in, struct file *out, size_t len, - unsigned int flags); -extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags); +ssize_t do_tee(struct file *in, struct file *out, size_t len, + unsigned int flags); +ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags); /* * for dynamic pipe sizing diff --git a/io_uring/splice.c b/io_uring/splice.c index 7c4469e9540e..3b659cd23e9d 100644 --- a/io_uring/splice.c +++ b/io_uring/splice.c @@ -51,7 +51,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags) struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; struct file *in; - long ret = 0; + ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); @@ -92,7 +92,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags) unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; struct file *in; - long ret = 0; + ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); -- cgit v1.2.3 From 705bcfcbde38b9dd4db00cd3deb0b98bddb0dd4a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2023 11:44:37 +0200 Subject: fs: use splice_copy_file_range() inline helper generic_copy_file_range() is just a wrapper around splice_file_range(), which caps the maximum copy length. The only caller of splice_file_range(), namely __ceph_copy_file_range() is already ready to cope with short copy. Move the length capping into splice_file_range() and replace the exported symbol generic_copy_file_range() with a simple inline helper. Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/linux-fsdevel/20231204083849.GC32438@lst.de/ Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231212094440.250945-3-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/ceph/file.c | 4 ++-- fs/fuse/file.c | 5 +++-- fs/nfs/nfs4file.c | 5 +++-- fs/read_write.c | 34 ---------------------------------- fs/smb/client/cifsfs.c | 5 +++-- fs/splice.c | 7 ++++--- include/linux/fs.h | 3 --- include/linux/splice.h | 7 +++++++ 8 files changed, 22 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f11de6e1f1c1..d380d9dad0e0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -3090,8 +3090,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1cdb6327511e..7d0860ef63d0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -19,6 +19,7 @@ #include #include #include +#include static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, unsigned int open_flags, int opcode, @@ -3193,8 +3194,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 02788c3c85e5..e238abc78a13 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "delegation.h" #include "internal.h" #include "iostat.h" @@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(file_in, pos_in, file_out, - pos_out, count, flags); + ret = splice_copy_file_range(file_in, pos_in, file_out, + pos_out, count); return ret; } diff --git a/fs/read_write.c b/fs/read_write.c index 7783b8522693..e3abf603eaaf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1396,40 +1396,6 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif -/** - * generic_copy_file_range - copy data between two files - * @file_in: file structure to read from - * @pos_in: file offset to read from - * @file_out: file structure to write data to - * @pos_out: file offset to write data to - * @len: amount of data to copy - * @flags: copy flags - * - * This is a generic filesystem helper to copy data from one file to another. - * It has no constraints on the source or destination file owners - the files - * can belong to different superblocks and different filesystem types. Short - * copies are allowed. - * - * This should be called from the @file_out filesystem, as per the - * ->copy_file_range() method. - * - * Returns the number of bytes copied or a negative error indicating the - * failure. - */ - -ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* May only be called from within ->copy_file_range() methods */ - if (WARN_ON_ONCE(flags)) - return -EINVAL; - - return splice_file_range(file_in, &pos_in, file_out, &pos_out, - min_t(size_t, len, MAX_RW_COUNT)); -} -EXPORT_SYMBOL(generic_copy_file_range); - /* * Performs necessary checks before doing a file copy * diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index ea3a7a668b45..ee461bf0ef63 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1362,8 +1363,8 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, free_xid(xid); if (rc == -EOPNOTSUPP || rc == -EXDEV) - rc = generic_copy_file_range(src_file, off, dst_file, - destoff, len, flags); + rc = splice_copy_file_range(src_file, off, dst_file, + destoff, len); return rc; } diff --git a/fs/splice.c b/fs/splice.c index c39d1abf23c8..218e24b1ac40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1244,7 +1244,7 @@ EXPORT_SYMBOL(do_splice_direct); * @len: number of bytes to splice * * Description: - * For use by generic_copy_file_range() and ->copy_file_range() methods. + * For use by ->copy_file_range() methods. * Like do_splice_direct(), but vfs_copy_file_range() already holds * start_file_write() on @out file. * @@ -1255,8 +1255,9 @@ ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, { lockdep_assert(file_write_started(out)); - return do_splice_direct_actor(in, ppos, out, opos, len, 0, - splice_file_range_actor); + return do_splice_direct_actor(in, ppos, out, opos, + min_t(size_t, len, MAX_RW_COUNT), + 0, splice_file_range_actor); } EXPORT_SYMBOL(splice_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index 04422a0eccdd..900d0cd55b50 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2090,9 +2090,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, diff --git a/include/linux/splice.h b/include/linux/splice.h index 068a8e8ffd73..9dec4861d09f 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -88,6 +88,13 @@ ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len); +static inline long splice_copy_file_range(struct file *in, loff_t pos_in, + struct file *out, loff_t pos_out, + size_t len) +{ + return splice_file_range(in, &pos_in, out, &pos_out, len); +} + ssize_t do_tee(struct file *in, struct file *out, size_t len, unsigned int flags); ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, -- cgit v1.2.3 From 36e28c42187c95eb148873ffb059bfdcb8cdb75b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2023 11:44:38 +0200 Subject: fsnotify: split fsnotify_perm() into two hooks We would like to make changes to the fsnotify access permission hook - add file range arguments and add the pre modify event. In preparation for these changes, split the fsnotify_perm() hook into fsnotify_open_perm() and fsnotify_file_perm(). This is needed for fanotify "pre content" events. Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231212094440.250945-4-amir73il@gmail.com Signed-off-by: Christian Brauner --- include/linux/fsnotify.h | 34 +++++++++++++++++++--------------- security/security.c | 4 ++-- 2 files changed, 21 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index bcb6609b54b3..926bb4461b9e 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -100,29 +100,33 @@ static inline int fsnotify_file(struct file *file, __u32 mask) return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } -/* Simple call site for access decisions */ -static inline int fsnotify_perm(struct file *file, int mask) +/* + * fsnotify_file_perm - permission hook before file access + */ +static inline int fsnotify_file_perm(struct file *file, int perm_mask) { - int ret; - __u32 fsnotify_mask = 0; + __u32 fsnotify_mask = FS_ACCESS_PERM; - if (!(mask & (MAY_READ | MAY_OPEN))) + if (!(perm_mask & MAY_READ)) return 0; - if (mask & MAY_OPEN) { - fsnotify_mask = FS_OPEN_PERM; + return fsnotify_file(file, fsnotify_mask); +} - if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); +/* + * fsnotify_open_perm - permission hook before file open + */ +static inline int fsnotify_open_perm(struct file *file) +{ + int ret; - if (ret) - return ret; - } - } else if (mask & MAY_READ) { - fsnotify_mask = FS_ACCESS_PERM; + if (file->f_flags & __FMODE_EXEC) { + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + if (ret) + return ret; } - return fsnotify_file(file, fsnotify_mask); + return fsnotify_file(file, FS_OPEN_PERM); } /* diff --git a/security/security.c b/security/security.c index dcb3e7014f9b..d7f3703c5905 100644 --- a/security/security.c +++ b/security/security.c @@ -2586,7 +2586,7 @@ int security_file_permission(struct file *file, int mask) if (ret) return ret; - return fsnotify_perm(file, mask); + return fsnotify_file_perm(file, mask); } /** @@ -2837,7 +2837,7 @@ int security_file_open(struct file *file) if (ret) return ret; - return fsnotify_perm(file, MAY_OPEN); + return fsnotify_open_perm(file); } /** -- cgit v1.2.3 From cb383f06686734ef04daf63a4369566800717b7b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2023 11:44:39 +0200 Subject: fsnotify: assert that file_start_write() is not held in permission hooks filesystem may be modified in the context of fanotify permission events (e.g. by HSM service), so assert that sb freeze protection is not held. If the assertion fails, then the following deadlock would be possible: CPU0 CPU1 CPU2 ------------------------------------------------------------------------- file_start_write()#0 ... fsnotify_perm() fanotify_get_response() => (read event and fill file) ... ... freeze_super() ... sb_wait_write() ... vfs_write() file_start_write()#1 This example demonstrates a use case of an hierarchical storage management (HSM) service that uses fanotify permission events to fill the content of a file before access, while a 3rd process starts fsfreeze. This creates a circular dependeny: file_start_write()#0 => fanotify_get_response => file_start_write()#1 => sb_wait_write() => file_end_write()#0 Where file_end_write()#0 can never be called and none of the threads can make progress. The assertion is checked for both MAY_READ and MAY_WRITE permission hooks in preparation for a pre-modify permission event. The assertion is not checked for an open permission event, because do_open() takes mnt_want_write() in O_TRUNC case, meaning that it is not safe to write to filesystem in the content of an open permission event. Reviewed-by: Josef Bacik Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231212094440.250945-5-amir73il@gmail.com Signed-off-by: Christian Brauner --- include/linux/fsnotify.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 926bb4461b9e..0a9d6a8a747a 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -107,6 +107,13 @@ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { __u32 fsnotify_mask = FS_ACCESS_PERM; + /* + * filesystem may be modified in the context of permission events + * (e.g. by HSM filling a file on access), so sb freeze protection + * must not be held. + */ + lockdep_assert_once(file_write_not_started(file)); + if (!(perm_mask & MAY_READ)) return 0; -- cgit v1.2.3 From d9e5d31084b024734e64307521414ef0ae1d5333 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Dec 2023 11:44:40 +0200 Subject: fsnotify: optionally pass access range in file permission hooks In preparation for pre-content permission events with file access range, move fsnotify_file_perm() hook out of security_file_permission() and into the callers. Callers that have the access range information call the new hook fsnotify_file_area_perm() with the access range. Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20231212094440.250945-6-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/open.c | 4 ++++ fs/read_write.c | 10 ++++++++-- fs/readdir.c | 4 ++++ fs/remap_range.c | 8 +++++++- include/linux/fsnotify.h | 13 +++++++++++-- security/security.c | 8 +------- 6 files changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/open.c b/fs/open.c index 02dc608d40d8..d877228d5939 100644 --- a/fs/open.c +++ b/fs/open.c @@ -304,6 +304,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret) return ret; + ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len); + if (ret) + return ret; + if (S_ISFIFO(inode->i_mode)) return -ESPIPE; diff --git a/fs/read_write.c b/fs/read_write.c index e3abf603eaaf..d4c036e82b6c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -354,6 +354,9 @@ out_putf: int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) { + int mask = read_write == READ ? MAY_READ : MAY_WRITE; + int ret; + if (unlikely((ssize_t) count < 0)) return -EINVAL; @@ -371,8 +374,11 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t } } - return security_file_permission(file, - read_write == READ ? MAY_READ : MAY_WRITE); + ret = security_file_permission(file, mask); + if (ret) + return ret; + + return fsnotify_file_area_perm(file, mask, ppos, count); } EXPORT_SYMBOL(rw_verify_area); diff --git a/fs/readdir.c b/fs/readdir.c index c8c46e294431..278bc0254732 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -96,6 +96,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx) if (res) goto out; + res = fsnotify_file_perm(file, MAY_READ); + if (res) + goto out; + res = down_read_killable(&inode->i_rwsem); if (res) goto out; diff --git a/fs/remap_range.c b/fs/remap_range.c index 12131f2a6c9e..f8c1120b8311 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -102,7 +102,9 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, static int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write) { + int mask = write ? MAY_WRITE : MAY_READ; loff_t tmp; + int ret; if (unlikely(pos < 0 || len < 0)) return -EINVAL; @@ -110,7 +112,11 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, if (unlikely(check_add_overflow(pos, len, &tmp))) return -EINVAL; - return security_file_permission(file, write ? MAY_WRITE : MAY_READ); + ret = security_file_permission(file, mask); + if (ret) + return ret; + + return fsnotify_file_area_perm(file, mask, &pos, len); } /* diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 0a9d6a8a747a..11e6434b8e71 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -101,9 +101,10 @@ static inline int fsnotify_file(struct file *file, __u32 mask) } /* - * fsnotify_file_perm - permission hook before file access + * fsnotify_file_area_perm - permission hook before access to file range */ -static inline int fsnotify_file_perm(struct file *file, int perm_mask) +static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, + const loff_t *ppos, size_t count) { __u32 fsnotify_mask = FS_ACCESS_PERM; @@ -120,6 +121,14 @@ static inline int fsnotify_file_perm(struct file *file, int perm_mask) return fsnotify_file(file, fsnotify_mask); } +/* + * fsnotify_file_perm - permission hook before file access + */ +static inline int fsnotify_file_perm(struct file *file, int perm_mask) +{ + return fsnotify_file_area_perm(file, perm_mask, NULL, 0); +} + /* * fsnotify_open_perm - permission hook before file open */ diff --git a/security/security.c b/security/security.c index d7f3703c5905..2a7fc7881cbc 100644 --- a/security/security.c +++ b/security/security.c @@ -2580,13 +2580,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir, */ int security_file_permission(struct file *file, int mask) { - int ret; - - ret = call_int_hook(file_permission, 0, file, mask); - if (ret) - return ret; - - return fsnotify_file_perm(file, mask); + return call_int_hook(file_permission, 0, file, mask); } /** -- cgit v1.2.3 From f91a704f7161c2cf0fcd41fa9fbec4355b813fff Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 2 Oct 2023 17:19:46 +0300 Subject: fs: prepare for stackable filesystems backing file helpers In preparation for factoring out some backing file io helpers from overlayfs, move backing_file_open() into a new file fs/backing-file.c and header. Add a MAINTAINERS entry for stackable filesystems and add a Kconfig FS_STACK which stackable filesystems need to select. For now, the backing_file struct, the backing_file alloc/free functions and the backing_file_real_path() accessor remain internal to file_table.c. We may change that in the future. Signed-off-by: Amir Goldstein --- MAINTAINERS | 9 +++++++++ fs/Kconfig | 4 ++++ fs/Makefile | 1 + fs/backing-file.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ fs/open.c | 38 ----------------------------------- fs/overlayfs/Kconfig | 1 + fs/overlayfs/file.c | 1 + include/linux/backing-file.h | 17 ++++++++++++++++ include/linux/fs.h | 3 --- 9 files changed, 81 insertions(+), 41 deletions(-) create mode 100644 fs/backing-file.c create mode 100644 include/linux/backing-file.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..d9ae2688f5c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8186,6 +8186,15 @@ S: Supported F: fs/iomap/ F: include/linux/iomap.h +FILESYSTEMS [STACKABLE] +M: Miklos Szeredi +M: Amir Goldstein +L: linux-fsdevel@vger.kernel.org +L: linux-unionfs@vger.kernel.org +S: Maintained +F: fs/backing-file.c +F: include/linux/backing-file.h + FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio L: linux-hwmon@vger.kernel.org diff --git a/fs/Kconfig b/fs/Kconfig index fd1f655b4f1f..c47fa4eb9282 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER config FS_IOMAP bool +# Stackable filesystems +config FS_STACK + bool + config BUFFER_HEAD bool diff --git a/fs/Makefile b/fs/Makefile index 75522f88e763..a6962c588962 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o +obj-$(CONFIG_FS_STACK) += backing-file.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ diff --git a/fs/backing-file.c b/fs/backing-file.c new file mode 100644 index 000000000000..04b33036f709 --- /dev/null +++ b/fs/backing-file.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Common helpers for stackable filesystems and backing files. + * + * Copyright (C) 2023 CTERA Networks. + */ + +#include +#include + +#include "internal.h" + +/** + * backing_file_open - open a backing file for kernel internal use + * @user_path: path that the user reuqested to open + * @flags: open flags + * @real_path: path of the backing file + * @cred: credentials for open + * + * Open a backing file for a stackable filesystem (e.g., overlayfs). + * @user_path may be on the stackable filesystem and @real_path on the + * underlying filesystem. In this case, we want to be able to return the + * @user_path of the stackable filesystem. This is done by embedding the + * returned file into a container structure that also stores the stacked + * file's path, which can be retrieved using backing_file_user_path(). + */ +struct file *backing_file_open(const struct path *user_path, int flags, + const struct path *real_path, + const struct cred *cred) +{ + struct file *f; + int error; + + f = alloc_empty_backing_file(flags, cred); + if (IS_ERR(f)) + return f; + + path_get(user_path); + *backing_file_user_path(f) = *user_path; + error = vfs_open(real_path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + + return f; +} +EXPORT_SYMBOL_GPL(backing_file_open); diff --git a/fs/open.c b/fs/open.c index d877228d5939..a75054237437 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1184,44 +1184,6 @@ struct file *kernel_file_open(const struct path *path, int flags, } EXPORT_SYMBOL_GPL(kernel_file_open); -/** - * backing_file_open - open a backing file for kernel internal use - * @user_path: path that the user reuqested to open - * @flags: open flags - * @real_path: path of the backing file - * @cred: credentials for open - * - * Open a backing file for a stackable filesystem (e.g., overlayfs). - * @user_path may be on the stackable filesystem and @real_path on the - * underlying filesystem. In this case, we want to be able to return the - * @user_path of the stackable filesystem. This is done by embedding the - * returned file into a container structure that also stores the stacked - * file's path, which can be retrieved using backing_file_user_path(). - */ -struct file *backing_file_open(const struct path *user_path, int flags, - const struct path *real_path, - const struct cred *cred) -{ - struct file *f; - int error; - - f = alloc_empty_backing_file(flags, cred); - if (IS_ERR(f)) - return f; - - path_get(user_path); - *backing_file_user_path(f) = *user_path; - f->f_path = *real_path; - error = do_dentry_open(f, d_inode(real_path->dentry), NULL); - if (error) { - fput(f); - f = ERR_PTR(error); - } - - return f; -} -EXPORT_SYMBOL_GPL(backing_file_open); - #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index fec5020c3495..2ac67e04a6fb 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config OVERLAY_FS tristate "Overlay filesystem support" + select FS_STACK select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 4e46420c8fdd..a6da3eaf6d4f 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "overlayfs.h" #include "../internal.h" /* for sb_init_dio_done_wq */ diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h new file mode 100644 index 000000000000..55c9e804f780 --- /dev/null +++ b/include/linux/backing-file.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Common helpers for stackable filesystems and backing files. + * + * Copyright (C) 2023 CTERA Networks. + */ + +#ifndef _LINUX_BACKING_FILE_H +#define _LINUX_BACKING_FILE_H + +#include + +struct file *backing_file_open(const struct path *user_path, int flags, + const struct path *real_path, + const struct cred *cred); + +#endif /* _LINUX_BACKING_FILE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 900d0cd55b50..db5d07e6e02e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2575,9 +2575,6 @@ struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct file *backing_file_open(const struct path *user_path, int flags, - const struct path *real_path, - const struct cred *cred); struct path *backing_file_user_path(struct file *f); /* -- cgit v1.2.3 From a6293b3e285cd0d7692141d7981a5f144f0e2f0b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2023 17:48:52 +0200 Subject: fs: factor out backing_file_{read,write}_iter() helpers Overlayfs submits files io to backing files on other filesystems. Factor out some common helpers to perform io to backing files, into fs/backing-file.c. Suggested-by: Miklos Szeredi Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com Signed-off-by: Amir Goldstein --- fs/backing-file.c | 210 +++++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/file.c | 188 +++----------------------------------- fs/overlayfs/overlayfs.h | 8 +- fs/overlayfs/super.c | 11 +-- include/linux/backing-file.h | 15 ++++ 5 files changed, 247 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/fs/backing-file.c b/fs/backing-file.c index 04b33036f709..323187a49da3 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -2,6 +2,9 @@ /* * Common helpers for stackable filesystems and backing files. * + * Forked from fs/overlayfs/file.c. + * + * Copyright (C) 2017 Red Hat, Inc. * Copyright (C) 2023 CTERA Networks. */ @@ -46,3 +49,210 @@ struct file *backing_file_open(const struct path *user_path, int flags, return f; } EXPORT_SYMBOL_GPL(backing_file_open); + +struct backing_aio { + struct kiocb iocb; + refcount_t ref; + struct kiocb *orig_iocb; + /* used for aio completion */ + void (*end_write)(struct file *); + struct work_struct work; + long res; +}; + +static struct kmem_cache *backing_aio_cachep; + +#define BACKING_IOCB_MASK \ + (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) + +static rwf_t iocb_to_rw_flags(int flags) +{ + return (__force rwf_t)(flags & BACKING_IOCB_MASK); +} + +static void backing_aio_put(struct backing_aio *aio) +{ + if (refcount_dec_and_test(&aio->ref)) { + fput(aio->iocb.ki_filp); + kmem_cache_free(backing_aio_cachep, aio); + } +} + +static void backing_aio_cleanup(struct backing_aio *aio, long res) +{ + struct kiocb *iocb = &aio->iocb; + struct kiocb *orig_iocb = aio->orig_iocb; + + if (aio->end_write) + aio->end_write(orig_iocb->ki_filp); + + orig_iocb->ki_pos = iocb->ki_pos; + backing_aio_put(aio); +} + +static void backing_aio_rw_complete(struct kiocb *iocb, long res) +{ + struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); + struct kiocb *orig_iocb = aio->orig_iocb; + + if (iocb->ki_flags & IOCB_WRITE) + kiocb_end_write(iocb); + + backing_aio_cleanup(aio, res); + orig_iocb->ki_complete(orig_iocb, res); +} + +static void backing_aio_complete_work(struct work_struct *work) +{ + struct backing_aio *aio = container_of(work, struct backing_aio, work); + + backing_aio_rw_complete(&aio->iocb, aio->res); +} + +static void backing_aio_queue_completion(struct kiocb *iocb, long res) +{ + struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); + + /* + * Punt to a work queue to serialize updates of mtime/size. + */ + aio->res = res; + INIT_WORK(&aio->work, backing_aio_complete_work); + queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, + &aio->work); +} + +static int backing_aio_init_wq(struct kiocb *iocb) +{ + struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; + + if (sb->s_dio_done_wq) + return 0; + + return sb_init_dio_done_wq(sb); +} + + +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx) +{ + struct backing_aio *aio = NULL; + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) + return -EIO; + + if (!iov_iter_count(iter)) + return 0; + + if (iocb->ki_flags & IOCB_DIRECT && + !(file->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; + + old_cred = override_creds(ctx->cred); + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); + } else { + ret = -ENOMEM; + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + goto out; + + aio->orig_iocb = iocb; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_complete = backing_aio_rw_complete; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_read(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + } +out: + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_read_iter); + +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) + return -EIO; + + if (!iov_iter_count(iter)) + return 0; + + ret = file_remove_privs(ctx->user_file); + if (ret) + return ret; + + if (iocb->ki_flags & IOCB_DIRECT && + !(file->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; + + /* + * Stacked filesystems don't support deferred completions, don't copy + * this property in case it is set by the issuer. + */ + flags &= ~IOCB_DIO_CALLER_COMP; + + old_cred = override_creds(ctx->cred); + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); + if (ctx->end_write) + ctx->end_write(ctx->user_file); + } else { + struct backing_aio *aio; + + ret = backing_aio_init_wq(iocb); + if (ret) + goto out; + + ret = -ENOMEM; + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + goto out; + + aio->orig_iocb = iocb; + aio->end_write = ctx->end_write; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_flags = flags; + aio->iocb.ki_complete = backing_aio_queue_completion; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_write(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + } +out: + revert_creds(old_cred); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_write_iter); + +static int __init backing_aio_init(void) +{ + backing_aio_cachep = kmem_cache_create("backing_aio", + sizeof(struct backing_aio), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!backing_aio_cachep) + return -ENOMEM; + + return 0; +} +fs_initcall(backing_aio_init); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index a6da3eaf6d4f..1b578cb27a26 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -16,19 +16,6 @@ #include #include "overlayfs.h" -#include "../internal.h" /* for sb_init_dio_done_wq */ - -struct ovl_aio_req { - struct kiocb iocb; - refcount_t ref; - struct kiocb *orig_iocb; - /* used for aio completion */ - struct work_struct work; - long res; -}; - -static struct kmem_cache *ovl_aio_request_cachep; - static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -275,84 +262,16 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -#define OVL_IOCB_MASK \ - (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) - -static rwf_t iocb_to_rw_flags(int flags) -{ - return (__force rwf_t)(flags & OVL_IOCB_MASK); -} - -static inline void ovl_aio_put(struct ovl_aio_req *aio_req) -{ - if (refcount_dec_and_test(&aio_req->ref)) { - fput(aio_req->iocb.ki_filp); - kmem_cache_free(ovl_aio_request_cachep, aio_req); - } -} - -static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) -{ - struct kiocb *iocb = &aio_req->iocb; - struct kiocb *orig_iocb = aio_req->orig_iocb; - - if (iocb->ki_flags & IOCB_WRITE) - ovl_file_modified(orig_iocb->ki_filp); - - orig_iocb->ki_pos = iocb->ki_pos; - ovl_aio_put(aio_req); -} - -static void ovl_aio_rw_complete(struct kiocb *iocb, long res) -{ - struct ovl_aio_req *aio_req = container_of(iocb, - struct ovl_aio_req, iocb); - struct kiocb *orig_iocb = aio_req->orig_iocb; - - if (iocb->ki_flags & IOCB_WRITE) - kiocb_end_write(iocb); - - ovl_aio_cleanup_handler(aio_req); - orig_iocb->ki_complete(orig_iocb, res); -} - -static void ovl_aio_complete_work(struct work_struct *work) -{ - struct ovl_aio_req *aio_req = container_of(work, - struct ovl_aio_req, work); - - ovl_aio_rw_complete(&aio_req->iocb, aio_req->res); -} - -static void ovl_aio_queue_completion(struct kiocb *iocb, long res) -{ - struct ovl_aio_req *aio_req = container_of(iocb, - struct ovl_aio_req, iocb); - struct kiocb *orig_iocb = aio_req->orig_iocb; - - /* - * Punt to a work queue to serialize updates of mtime/size. - */ - aio_req->res = res; - INIT_WORK(&aio_req->work, ovl_aio_complete_work); - queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq, - &aio_req->work); -} - -static int ovl_init_aio_done_wq(struct super_block *sb) -{ - if (sb->s_dio_done_wq) - return 0; - - return sb_init_dio_done_wq(sb); -} - static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct fd real; - const struct cred *old_cred; ssize_t ret; + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(file)->i_sb), + .user_file = file, + .accessed = ovl_file_accessed, + }; if (!iov_iter_count(iter)) return 0; @@ -361,37 +280,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) return ret; - ret = -EINVAL; - if (iocb->ki_flags & IOCB_DIRECT && - !(real.file->f_mode & FMODE_CAN_ODIRECT)) - goto out_fdput; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - if (is_sync_kiocb(iocb)) { - rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags); - - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf); - } else { - struct ovl_aio_req *aio_req; - - ret = -ENOMEM; - aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); - if (!aio_req) - goto out; - - aio_req->orig_iocb = iocb; - kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); - aio_req->iocb.ki_complete = ovl_aio_rw_complete; - refcount_set(&aio_req->ref, 2); - ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); - ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -out: - revert_creds(old_cred); - ovl_file_accessed(file); -out_fdput: + ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags, + &ctx); fdput(real); return ret; @@ -402,9 +292,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct fd real; - const struct cred *old_cred; ssize_t ret; int ifl = iocb->ki_flags; + struct backing_file_ctx ctx = { + .cred = ovl_creds(inode->i_sb), + .user_file = file, + .end_write = ovl_file_modified, + }; if (!iov_iter_count(iter)) return 0; @@ -412,19 +306,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) inode_lock(inode); /* Update mode */ ovl_copyattr(inode); - ret = file_remove_privs(file); - if (ret) - goto out_unlock; ret = ovl_real_fdget(file, &real); if (ret) goto out_unlock; - ret = -EINVAL; - if (iocb->ki_flags & IOCB_DIRECT && - !(real.file->f_mode & FMODE_CAN_ODIRECT)) - goto out_fdput; - if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); @@ -433,39 +319,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) * this property in case it is set by the issuer. */ ifl &= ~IOCB_DIO_CALLER_COMP; - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - if (is_sync_kiocb(iocb)) { - rwf_t rwf = iocb_to_rw_flags(ifl); - - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf); - /* Update size */ - ovl_file_modified(file); - } else { - struct ovl_aio_req *aio_req; - - ret = ovl_init_aio_done_wq(inode->i_sb); - if (ret) - goto out; - - ret = -ENOMEM; - aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); - if (!aio_req) - goto out; - - aio_req->orig_iocb = iocb; - kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); - aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_queue_completion; - refcount_set(&aio_req->ref, 2); - ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); - ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -out: - revert_creds(old_cred); -out_fdput: + ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx); fdput(real); out_unlock: @@ -777,19 +631,3 @@ const struct file_operations ovl_file_operations = { .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, }; - -int __init ovl_aio_request_cache_init(void) -{ - ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req", - sizeof(struct ovl_aio_req), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ovl_aio_request_cachep) - return -ENOMEM; - - return 0; -} - -void ovl_aio_request_cache_destroy(void) -{ - kmem_cache_destroy(ovl_aio_request_cachep); -} diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ca88b2636a57..509a57c85fae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -417,6 +417,12 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); + +static inline const struct cred *ovl_creds(struct super_block *sb) +{ + return OVL_FS(sb)->creator_cred; +} + int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); @@ -829,8 +835,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, /* file.c */ extern const struct file_operations ovl_file_operations; -int __init ovl_aio_request_cache_init(void); -void ovl_aio_request_cache_destroy(void); int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index a0967bb25003..bcd4c314a7eb 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1501,14 +1501,10 @@ static int __init ovl_init(void) if (ovl_inode_cachep == NULL) return -ENOMEM; - err = ovl_aio_request_cache_init(); - if (!err) { - err = register_filesystem(&ovl_fs_type); - if (!err) - return 0; + err = register_filesystem(&ovl_fs_type); + if (!err) + return 0; - ovl_aio_request_cache_destroy(); - } kmem_cache_destroy(ovl_inode_cachep); return err; @@ -1524,7 +1520,6 @@ static void __exit ovl_exit(void) */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); - ovl_aio_request_cache_destroy(); } module_init(ovl_init); diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 55c9e804f780..0648d548a418 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -9,9 +9,24 @@ #define _LINUX_BACKING_FILE_H #include +#include +#include + +struct backing_file_ctx { + const struct cred *cred; + struct file *user_file; + void (*accessed)(struct file *); + void (*end_write)(struct file *); +}; struct file *backing_file_open(const struct path *user_path, int flags, const struct path *real_path, const struct cred *cred); +ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + struct backing_file_ctx *ctx); #endif /* _LINUX_BACKING_FILE_H */ -- cgit v1.2.3 From 9b7e9e2f5d5c3d079ec46bc71b114012e362ea6e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 13 Oct 2023 12:13:12 +0300 Subject: fs: factor out backing_file_splice_{read,write}() helpers There is not much in those helpers, but it makes sense to have them logically next to the backing_file_{read,write}_iter() helpers as they may grow more common logic in the future. Signed-off-by: Amir Goldstein --- fs/backing-file.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/file.c | 33 +++++++++++----------------- include/linux/backing-file.h | 8 +++++++ 3 files changed, 72 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/backing-file.c b/fs/backing-file.c index 323187a49da3..ddd35c1d6c71 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -10,6 +10,7 @@ #include #include +#include #include "internal.h" @@ -245,6 +246,56 @@ out: } EXPORT_SYMBOL_GPL(backing_file_write_iter); +ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) + return -EIO; + + old_cred = override_creds(ctx->cred); + ret = vfs_splice_read(in, ppos, pipe, len, flags); + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_splice_read); + +ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + ssize_t ret; + + if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) + return -EIO; + + ret = file_remove_privs(ctx->user_file); + if (ret) + return ret; + + old_cred = override_creds(ctx->cred); + file_start_write(out); + ret = iter_file_splice_write(pipe, out, ppos, len, flags); + file_end_write(out); + revert_creds(old_cred); + + if (ctx->end_write) + ctx->end_write(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_splice_write); + static int __init backing_aio_init(void) { backing_aio_cachep = kmem_cache_create("backing_aio", diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 1b578cb27a26..69b52d2f9c74 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -332,20 +331,21 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - const struct cred *old_cred; struct fd real; ssize_t ret; + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(in)->i_sb), + .user_file = in, + .accessed = ovl_file_accessed, + }; ret = ovl_real_fdget(in, &real); if (ret) return ret; - old_cred = ovl_override_creds(file_inode(in)->i_sb); - ret = vfs_splice_read(real.file, ppos, pipe, len, flags); - revert_creds(old_cred); - ovl_file_accessed(in); - + ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx); fdput(real); + return ret; } @@ -361,30 +361,23 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct fd real; - const struct cred *old_cred; struct inode *inode = file_inode(out); ssize_t ret; + struct backing_file_ctx ctx = { + .cred = ovl_creds(inode->i_sb), + .user_file = out, + .end_write = ovl_file_modified, + }; inode_lock(inode); /* Update mode */ ovl_copyattr(inode); - ret = file_remove_privs(out); - if (ret) - goto out_unlock; ret = ovl_real_fdget(out, &real); if (ret) goto out_unlock; - old_cred = ovl_override_creds(inode->i_sb); - file_start_write(real.file); - - ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); - - file_end_write(real.file); - /* Update size */ - ovl_file_modified(out); - revert_creds(old_cred); + ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx); fdput(real); out_unlock: diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 0648d548a418..0546d5b1c9f5 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -28,5 +28,13 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx); +ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); +ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, + struct backing_file_ctx *ctx); #endif /* _LINUX_BACKING_FILE_H */ -- cgit v1.2.3 From f567377e406c032fff0799bde4fdf4a977529b84 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 13 Oct 2023 12:49:37 +0300 Subject: fs: factor out backing_file_mmap() helper Assert that the file object is allocated in a backing_file container so that file_user_path() could be used to display the user path and not the backing file's path in /proc//maps. Signed-off-by: Amir Goldstein --- fs/backing-file.c | 27 +++++++++++++++++++++++++++ fs/overlayfs/file.c | 23 ++++++----------------- include/linux/backing-file.h | 2 ++ 3 files changed, 35 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/backing-file.c b/fs/backing-file.c index ddd35c1d6c71..a681f38d84d8 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "internal.h" @@ -296,6 +297,32 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, } EXPORT_SYMBOL_GPL(backing_file_splice_write); +int backing_file_mmap(struct file *file, struct vm_area_struct *vma, + struct backing_file_ctx *ctx) +{ + const struct cred *old_cred; + int ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || + WARN_ON_ONCE(ctx->user_file != vma->vm_file)) + return -EIO; + + if (!file->f_op->mmap) + return -ENODEV; + + vma_set_file(vma, file); + + old_cred = override_creds(ctx->cred); + ret = call_mmap(vma->vm_file, vma); + revert_creds(old_cred); + + if (ctx->accessed) + ctx->accessed(ctx->user_file); + + return ret; +} +EXPORT_SYMBOL_GPL(backing_file_mmap); + static int __init backing_aio_init(void) { backing_aio_cachep = kmem_cache_create("backing_aio", diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 69b52d2f9c74..05536964d37f 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include "overlayfs.h" @@ -415,23 +414,13 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) static int ovl_mmap(struct file *file, struct vm_area_struct *vma) { struct file *realfile = file->private_data; - const struct cred *old_cred; - int ret; - - if (!realfile->f_op->mmap) - return -ENODEV; - - if (WARN_ON(file != vma->vm_file)) - return -EIO; - - vma_set_file(vma, realfile); - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = call_mmap(vma->vm_file, vma); - revert_creds(old_cred); - ovl_file_accessed(file); + struct backing_file_ctx ctx = { + .cred = ovl_creds(file_inode(file)->i_sb), + .user_file = file, + .accessed = ovl_file_accessed, + }; - return ret; + return backing_file_mmap(realfile, vma, &ctx); } static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len) diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 0546d5b1c9f5..3f1fe1774f1b 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -36,5 +36,7 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, struct backing_file_ctx *ctx); +int backing_file_mmap(struct file *file, struct vm_area_struct *vma, + struct backing_file_ctx *ctx); #endif /* _LINUX_BACKING_FILE_H */ -- cgit v1.2.3 From c39e2ae3943d4ee278af4e1b1dcfd5946da1089b Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 28 Dec 2023 11:06:08 +0100 Subject: fs: fix __sb_write_started() kerneldoc formatting When running 'make htmldocs', I see the following warning: Documentation/filesystems/api-summary:14: ./include/linux/fs.h:1659: WARNING: Definition list ends without a blank line; unexpected unindent. The official guidance [1] seems to be to use lists, which will prevent both the "unexpected unindent" warning as well as ensure that each line is formatted on a separate line in the HTML output instead of being all considered a single paragraph. [1]: https://docs.kernel.org/doc-guide/kernel-doc.html#return-values Fixes: 8802e580ee64 ("fs: create __sb_write_started() helper") Cc: Amir Goldstein Cc: Josef Bacik Cc: Jan Kara Signed-off-by: Vegard Nossum Link: https://lore.kernel.org/r/20231228100608.3123987-1-vegard.nossum@oracle.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index db5d07e6e02e..473063f385e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1650,9 +1650,9 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) * @sb: the super we write to * @level: the freeze level * - * > 0 sb freeze level is held - * 0 sb freeze level is not held - * < 0 !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN + * * > 0 - sb freeze level is held + * * 0 - sb freeze level is not held + * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN */ static inline int __sb_write_started(const struct super_block *sb, int level) { -- cgit v1.2.3