summaryrefslogtreecommitdiff
path: root/include/linux/fs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r--include/linux/fs.h177
1 files changed, 148 insertions, 29 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7efc6866ebc..ef41a8213a6f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
+#include <linux/vfsdebug.h>
#include <linux/linkage.h>
#include <linux/wait_bit.h>
#include <linux/kdev_t.h>
@@ -173,13 +174,20 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_NOREUSE ((__force fmode_t)(1 << 23))
-/* FMODE_* bit 24 */
-
/* File is embedded in backing_file object */
-#define FMODE_BACKING ((__force fmode_t)(1 << 25))
+#define FMODE_BACKING ((__force fmode_t)(1 << 24))
-/* File was opened by fanotify and shouldn't generate fanotify events */
-#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26))
+/*
+ * Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be
+ * generated (see below)
+ */
+#define FMODE_NONOTIFY ((__force fmode_t)(1 << 25))
+
+/*
+ * Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be
+ * generated (see below)
+ */
+#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26))
/* File is capable of returning -EAGAIN if I/O will block */
#define FMODE_NOWAIT ((__force fmode_t)(1 << 27))
@@ -191,6 +199,31 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29))
/*
+ * The two FMODE_NONOTIFY* define which fsnotify events should not be generated
+ * for a file. These are the possible values of (f->f_mode &
+ * FMODE_FSNOTIFY_MASK) and their meaning:
+ *
+ * FMODE_NONOTIFY - suppress all (incl. non-permission) events.
+ * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events.
+ * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events.
+ */
+#define FMODE_FSNOTIFY_MASK \
+ (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)
+
+#define FMODE_FSNOTIFY_NONE(mode) \
+ ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY)
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+#define FMODE_FSNOTIFY_PERM(mode) \
+ ((mode & FMODE_FSNOTIFY_MASK) == 0 || \
+ (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM))
+#define FMODE_FSNOTIFY_HSM(mode) \
+ ((mode & FMODE_FSNOTIFY_MASK) == 0)
+#else
+#define FMODE_FSNOTIFY_PERM(mode) 0
+#define FMODE_FSNOTIFY_HSM(mode) 0
+#endif
+
+/*
* Attribute flags. These should be or-ed together to figure out what
* has been changed!
*/
@@ -322,6 +355,7 @@ struct readahead_control;
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
#define IOCB_APPEND (__force int) RWF_APPEND
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
+#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -348,6 +382,7 @@ struct readahead_control;
#define IOCB_DIO_CALLER_COMP (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
#define IOCB_AIO_RW (1 << 23)
+#define IOCB_HAS_METADATA (1 << 24)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -356,7 +391,8 @@ struct readahead_control;
{ IOCB_SYNC, "SYNC" }, \
{ IOCB_NOWAIT, "NOWAIT" }, \
{ IOCB_APPEND, "APPEND" }, \
- { IOCB_ATOMIC, "ATOMIC"}, \
+ { IOCB_ATOMIC, "ATOMIC" }, \
+ { IOCB_DONTCACHE, "DONTCACHE" }, \
{ IOCB_EVENTFD, "EVENTFD"}, \
{ IOCB_DIRECT, "DIRECT" }, \
{ IOCB_WRITE, "WRITE" }, \
@@ -626,6 +662,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
#define IOP_MGTIME 0x0020
+#define IOP_CACHED_LINK 0x0040
/*
* Keep mostly read-only and often accessed (especially for
@@ -723,7 +760,10 @@ struct inode {
};
struct file_lock_context *i_flctx;
struct address_space i_data;
- struct list_head i_devices;
+ union {
+ struct list_head i_devices;
+ int i_linklen;
+ };
union {
struct pipe_inode_info *i_pipe;
struct cdev *i_cdev;
@@ -749,6 +789,15 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
+{
+ VFS_WARN_ON_INODE(strlen(link) != linklen, inode);
+ VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode);
+ inode->i_link = link;
+ inode->i_linklen = linklen;
+ inode->i_opflags |= IOP_CACHED_LINK;
+}
+
/*
* Get bit address from inode->i_state to use with wait_var_event()
* infrastructre.
@@ -1008,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
- * @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@@ -1018,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_flags: file flags
* @f_iocb_flags: iocb flags
* @f_cred: stashed credentials of creator/opener
+ * @f_owner: file owner
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
* @f_security: LSM security context of this file
- * @f_owner: file owner
* @f_wb_err: writeback error
* @f_sb_err: per sb writeback errors
* @f_ep: link of all epoll hooks for this file
@@ -1031,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_llist: work queue entrypoint
* @f_ra: file's readahead state
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
+ * @f_ref: reference count
*/
struct file {
- file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@@ -1043,6 +1091,7 @@ struct file {
unsigned int f_flags;
unsigned int f_iocb_flags;
const struct cred *f_cred;
+ struct fown_struct *f_owner;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
union {
@@ -1056,7 +1105,6 @@ struct file {
void *f_security;
#endif
/* --- cacheline 2 boundary (128 bytes) --- */
- struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
#ifdef CONFIG_EPOLL
@@ -1068,6 +1116,7 @@ struct file {
struct file_ra_state f_ra;
freeptr_t f_freeptr;
};
+ file_ref_t f_ref;
/* --- cacheline 3 boundary (192 bytes) --- */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
@@ -1199,11 +1248,19 @@ extern int send_sigurg(struct file *file);
#define SB_NOUSER BIT(31)
/* These flags relate to encoding and casefolding */
-#define SB_ENC_STRICT_MODE_FL (1 << 0)
+#define SB_ENC_STRICT_MODE_FL (1 << 0)
+#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1)
#define sb_has_strict_encoding(sb) \
(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+#if IS_ENABLED(CONFIG_UNICODE)
+#define sb_no_casefold_compat_fallback(sb) \
+ (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL)
+#else
+#define sb_no_casefold_compat_fallback(sb) (1)
+#endif
+
/*
* Umount options
*/
@@ -1232,6 +1289,7 @@ extern int send_sigurg(struct file *file);
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
+#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */
/* Possible states of 'frozen' field */
enum {
@@ -1921,8 +1979,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
*/
int vfs_create(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, bool);
-int vfs_mkdir(struct mnt_idmap *, struct inode *,
- struct dentry *, umode_t);
+struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
int vfs_symlink(struct mnt_idmap *, struct inode *,
@@ -1979,7 +2037,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
@@ -2127,6 +2185,8 @@ struct file_operations {
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
+/* File system supports uncached read/write buffered IO */
+#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -2149,8 +2209,8 @@ struct inode_operations {
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
const char *);
- int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
- umode_t);
+ struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *,
+ struct dentry *, umode_t);
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
umode_t,dev_t);
@@ -2284,6 +2344,7 @@ struct super_operations {
#define S_CASEFOLD (1 << 15) /* Casefolded file */
#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
+#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2340,6 +2401,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
+#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE)
static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
struct inode *inode)
@@ -2554,6 +2616,7 @@ struct file_system_type {
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
#define FS_MGTIME 64 /* FS uses multigrain timestamps */
+#define FS_LBS 128 /* FS supports LBS */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2591,9 +2654,6 @@ static inline bool is_mgtime(const struct inode *inode)
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_single(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2732,13 +2792,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
return mnt_idmap(mnt) != &nop_mnt_idmap;
}
-extern long vfs_truncate(const struct path *, loff_t);
+int vfs_truncate(const struct path *, loff_t);
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
+int do_sys_open(int dfd, const char __user *filename, int flags,
+ umode_t mode);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(const struct path *,
@@ -2751,6 +2811,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
}
struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
+struct file *dentry_open_nonotify(const struct path *path, int flags,
+ const struct cred *cred);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
struct path *backing_file_user_path(struct file *f);
@@ -2787,7 +2849,10 @@ extern int filp_close(struct file *, fl_owner_t id);
extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
-extern struct filename *getname(const char __user *);
+static inline struct filename *getname(const char __user *name)
+{
+ return getname_flags(name, 0);
+}
extern struct filename *getname_kernel(const char *);
extern struct filename *__getname_maybe_null(const char __user *);
static inline struct filename *getname_maybe_null(const char __user *name, int flags)
@@ -2800,6 +2865,13 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f
return __getname_maybe_null(name);
}
extern void putname(struct filename *name);
+DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T))
+
+static inline struct filename *refname(struct filename *name)
+{
+ atomic_inc(&name->refcnt);
+ return name;
+}
extern int finish_open(struct file *file, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
@@ -2874,6 +2946,8 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
extern int __must_check file_check_and_advance_wb_err(struct file *file);
extern int __must_check file_write_and_wait_range(struct file *file,
loff_t start, loff_t end);
+int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start,
+ loff_t end);
static inline int file_write_and_wait(struct file *file)
{
@@ -2906,6 +2980,11 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
(iocb->ki_flags & IOCB_SYNC) ? 0 : 1);
if (ret)
return ret;
+ } else if (iocb->ki_flags & IOCB_DONTCACHE) {
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+
+ filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count,
+ iocb->ki_pos - 1);
}
return count;
@@ -3059,6 +3138,34 @@ static inline void allow_write_access(struct file *file)
if (file)
atomic_inc(&file_inode(file)->i_writecount);
}
+
+/*
+ * Do not prevent write to executable file when watched by pre-content events.
+ *
+ * Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at
+ * the time of file open and remains constant for entire lifetime of the file,
+ * so if pre-content watches are added post execution or removed before the end
+ * of the execution, it will not cause i_writecount reference leak.
+ */
+static inline int exe_file_deny_write_access(struct file *exe_file)
+{
+ if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode)))
+ return 0;
+ return deny_write_access(exe_file);
+}
+static inline void exe_file_allow_write_access(struct file *exe_file)
+{
+ if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode)))
+ return;
+ allow_write_access(exe_file);
+}
+
+static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode)
+{
+ file->f_mode &= ~FMODE_FSNOTIFY_MASK;
+ file->f_mode |= mode;
+}
+
static inline bool inode_is_open_for_write(const struct inode *inode)
{
return atomic_read(&inode->i_writecount) > 0;
@@ -3198,7 +3305,11 @@ static inline void __iget(struct inode *inode)
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode_pseudo(struct super_block *sb);
+struct inode *alloc_inode(struct super_block *sb);
+static inline struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ return alloc_inode(sb);
+}
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
@@ -3351,8 +3462,10 @@ extern const struct file_operations generic_ro_fops;
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-extern int readlink_copy(char __user *, int, const char *);
+extern int readlink_copy(char __user *, int, const char *, int);
extern int page_readlink(struct dentry *, char __user *, int);
+extern const char *page_get_link_raw(struct dentry *, struct inode *,
+ struct delayed_call *);
extern const char *page_get_link(struct dentry *, struct inode *,
struct delayed_call *);
extern void page_put_link(void *);
@@ -3613,6 +3726,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
return -EOPNOTSUPP;
}
+ if (flags & RWF_DONTCACHE) {
+ /* file system must support it */
+ if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE))
+ return -EOPNOTSUPP;
+ /* DAX mappings not supported */
+ if (IS_DAX(ki->ki_filp->f_mapping->host))
+ return -EOPNOTSUPP;
+ }
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
@@ -3706,11 +3827,9 @@ struct ctl_table;
int __init list_bdev_fs_names(char *buf, size_t size);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
-#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
-#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
- (flag & __FMODE_NONOTIFY)))
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
static inline bool is_sxid(umode_t mode)
{