diff options
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 202 |
1 files changed, 154 insertions, 48 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..6e1fd5d21248 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,7 +2,7 @@ #define _LINUX_FS_H #include <linux/linkage.h> -#include <linux/wait.h> +#include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> @@ -18,8 +18,10 @@ #include <linux/bug.h> #include <linux/mutex.h> #include <linux/rwsem.h> +#include <linux/mm_types.h> #include <linux/capability.h> #include <linux/semaphore.h> +#include <linux/fcntl.h> #include <linux/fiemap.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> @@ -30,6 +32,8 @@ #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> +#include <linux/uuid.h> +#include <linux/errseq.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -142,6 +146,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +/* File is capable of returning -EAGAIN if AIO will block */ +#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -261,6 +268,18 @@ struct page; struct address_space; struct writeback_control; +/* + * Write life time hint values. + */ +enum rw_hint { + WRITE_LIFE_NOT_SET = 0, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +}; + #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) @@ -268,6 +287,7 @@ struct writeback_control; #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) +#define IOCB_NOWAIT (1 << 7) struct kiocb { struct file *ki_filp; @@ -275,23 +295,14 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; -}; + enum rw_hint ki_hint; +} __randomize_layout; static inline bool is_sync_kiocb(struct kiocb *kiocb) { return kiocb->ki_complete == NULL; } -static inline int iocb_flags(struct file *file); - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_filp = filp, - .ki_flags = iocb_flags(filp), - }; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet @@ -392,7 +403,8 @@ struct address_space { gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ -} __attribute__((aligned(sizeof(long)))); + errseq_t wb_err; +} __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit @@ -435,7 +447,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; -}; +} __randomize_layout; /* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache @@ -592,6 +604,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -653,7 +666,7 @@ struct inode { #endif void *i_private; /* fs or device private pointer */ -}; +} __randomize_layout; static inline unsigned int i_blocksize(const struct inode *node) { @@ -846,6 +859,7 @@ struct file { * Must not be taken from IRQ context. */ spinlock_t f_lock; + enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -868,7 +882,9 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ + errseq_t f_wb_err; +} __randomize_layout + __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; @@ -1005,7 +1021,7 @@ struct file_lock { int state; /* state of grant or error if -ve */ } afs; } fl_u; -}; +} __randomize_layout; struct file_lock_context { spinlock_t flc_lock; @@ -1021,8 +1037,6 @@ struct file_lock_context { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -#include <linux/fcntl.h> - extern void send_sigio(struct fown_struct *fown, int fd, int band); /* @@ -1038,14 +1052,14 @@ static inline struct inode *locks_inode(const struct file *f) } #ifdef CONFIG_FILE_LOCKING -extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); +extern int fcntl_getlk(struct file *, unsigned int, struct flock *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock __user *); + struct flock *); #if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); +extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 __user *); + struct flock64 *); #endif extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); @@ -1249,7 +1263,7 @@ extern void fasync_free(struct fasync_struct *); extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern void f_setown(struct file *filp, unsigned long arg, int force); +extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); @@ -1328,8 +1342,8 @@ struct super_block { struct sb_writers s_writers; - char s_id[32]; /* Informational name */ - u8 s_uuid[16]; /* UUID */ + char s_id[32]; /* Informational name */ + uuid_t s_uuid; /* UUID */ void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; @@ -1351,11 +1365,6 @@ struct super_block { */ char *s_subtype; - /* - * Saved mount options for lazy filesystems using - * generic_show_options() - */ - char __rcu *s_options; const struct dentry_operations *s_d_op; /* default d_op for dentries */ /* @@ -1404,7 +1413,7 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ -}; +} __randomize_layout; /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can @@ -1690,7 +1699,7 @@ struct file_operations { u64); ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, u64); -}; +} __randomize_layout; struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); @@ -1739,12 +1748,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } -static inline int call_fsync(struct file *file, loff_t start, loff_t end, - int datasync) -{ - return file->f_op->fsync(file, start, end, datasync); -} - ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, @@ -1873,6 +1876,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); } +static inline enum rw_hint file_write_hint(struct file *file) +{ + if (file->f_write_hint != WRITE_LIFE_NOT_SET) + return file->f_write_hint; + + return file_inode(file)->i_write_hint; +} + +static inline int iocb_flags(struct file *file); + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + .ki_flags = iocb_flags(filp), + .ki_hint = file_write_hint(filp), + }; +} + /* * Inode state bits. Protected by inode->i_lock * @@ -1929,6 +1951,9 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) * wb stat updates to grab mapping->tree_lock. See * inode_switch_wb_work_fn() for details. * + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper + * and work dirs among overlayfs mounts. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1949,6 +1974,7 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) +#define I_OVL_INUSE (1 << 14) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) @@ -2514,9 +2540,11 @@ extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *); -extern void filemap_fdatawait_keep_errors(struct address_space *); +extern int filemap_fdatawait_keep_errors(struct address_space *mapping); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +extern bool filemap_range_has_page(struct address_space *, loff_t lstart, + loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); @@ -2526,6 +2554,62 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); extern int filemap_check_errors(struct address_space *mapping); +extern void __filemap_set_wb_err(struct address_space *mapping, int err); +extern int __must_check file_check_and_advance_wb_err(struct file *file); +extern int __must_check file_write_and_wait_range(struct file *file, + loff_t start, loff_t end); + +/** + * filemap_set_wb_err - set a writeback error on an address_space + * @mapping: mapping in which to set writeback error + * @err: error to be set in mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * filemap_set_wb_err to record the error in the mapping so that it will be + * automatically reported whenever fsync is called on the file. + * + * FIXME: mention FS_* flag here? + */ +static inline void filemap_set_wb_err(struct address_space *mapping, int err) +{ + /* Fastpath for common case of no error */ + if (unlikely(err)) + __filemap_set_wb_err(mapping, err); +} + +/** + * filemap_check_wb_error - has an error occurred since the mark was sampled? + * @mapping: mapping to check for writeback errors + * @since: previously-sampled errseq_t + * + * Grab the errseq_t value from the mapping, and see if it has changed "since" + * the given value was sampled. + * + * If it has then report the latest error set, otherwise return 0. + */ +static inline int filemap_check_wb_err(struct address_space *mapping, + errseq_t since) +{ + return errseq_check(&mapping->wb_err, since); +} + +/** + * filemap_sample_wb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Writeback errors are always reported relative to a particular sample point + * in the past. This function provides those sample points. + */ +static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) +{ + return errseq_sample(&mapping->wb_err); +} + extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); @@ -2789,8 +2873,10 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); -ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos); +ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, + int flags); +ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, + int flags); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); @@ -2843,7 +2929,7 @@ enum { DIO_SKIP_DIO_COUNT = 0x08, }; -void dio_end_io(struct bio *bio, int error); +void dio_end_io(struct bio *bio); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, @@ -2956,7 +3042,7 @@ extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block); -extern void get_filesystem(struct file_system_type *fs); +extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); @@ -3033,15 +3119,16 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); -extern int generic_show_options(struct seq_file *m, struct dentry *root); -extern void save_mount_options(struct super_block *sb, char *options); -extern void replace_mount_options(struct super_block *sb, char *options); - static inline bool io_is_direct(struct file *filp) { return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); } +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); +} + static inline int iocb_flags(struct file *file) { int res = 0; @@ -3056,6 +3143,25 @@ static inline int iocb_flags(struct file *file) return res; } +static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) +{ + if (unlikely(flags & ~RWF_SUPPORTED)) + return -EOPNOTSUPP; + + if (flags & RWF_NOWAIT) { + if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT)) + return -EOPNOTSUPP; + ki->ki_flags |= IOCB_NOWAIT; + } + if (flags & RWF_HIPRI) + ki->ki_flags |= IOCB_HIPRI; + if (flags & RWF_DSYNC) + ki->ki_flags |= IOCB_DSYNC; + if (flags & RWF_SYNC) + ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + return 0; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; |