diff options
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 274 |
1 files changed, 78 insertions, 196 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index f5abba86107d..7519ae003a08 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File supports async buffered reads */ +#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -315,6 +318,8 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +/* iocb->ki_waitq is valid */ +#define IOCB_WAITQ (1 << 8) #define IOCB_NOIO (1 << 9) struct kiocb { @@ -329,7 +334,10 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - unsigned int ki_cookie; /* for ->iopoll */ + union { + unsigned int ki_cookie; /* for ->iopoll */ + struct wait_page_queue *ki_waitq; /* for async buffered IO */ + }; randomized_struct_fields_end }; @@ -471,45 +479,6 @@ struct address_space { * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ -struct request_queue; - -struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ - int bd_openers; - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; - struct mutex bd_mutex; /* open/close mutex */ - void * bd_claiming; - void * bd_holder; - int bd_holders; - bool bd_write_holder; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_disks; -#endif - struct block_device * bd_contains; - unsigned bd_block_size; - u8 bd_partno; - struct hd_struct * bd_part; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - int bd_invalidated; - struct gendisk * bd_disk; - struct request_queue * bd_queue; - struct backing_dev_info *bd_bdi; - struct list_head bd_list; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; - - /* The counter of freeze processes */ - int bd_fsfreeze_count; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; -} __randomize_layout; /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 @@ -549,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) up_read(&mapping->i_mmap_rwsem); } +static inline void i_mmap_assert_locked(struct address_space *mapping) +{ + lockdep_assert_held(&mapping->i_mmap_rwsem); +} + +static inline void i_mmap_assert_write_locked(struct address_space *mapping) +{ + lockdep_assert_held_write(&mapping->i_mmap_rwsem); +} + /* * Might pages of this file be mapped into userspace? */ @@ -559,7 +538,7 @@ static inline int mapping_mapped(struct address_space *mapping) /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * @@ -908,8 +887,6 @@ static inline unsigned imajor(const struct inode *inode) return MAJOR(inode->i_rdev); } -extern struct block_device *I_BDEV(struct inode *inode); - struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ @@ -1381,6 +1358,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_NODIRATIME 2048 /* Do not update directory access times */ #define SB_SILENT 32768 #define SB_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define SB_I_VERSION (1<<23) /* Update inode I_version field */ #define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ @@ -1744,6 +1722,10 @@ int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); +int vfs_fchown(struct file *file, uid_t user, gid_t group); +int vfs_fchmod(struct file *file, umode_t mode); +int vfs_utimes(const struct path *path, struct timespec64 *times); + extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT @@ -1775,14 +1757,6 @@ struct dir_context { loff_t pos; }; -struct block_device_operations; - -/* These macros are for out of kernel modules to test that - * the kernel supports the unlocked_ioctl and compat_ioctl - * fields in struct file_operations. */ -#define HAVE_COMPAT_IOCTL 1 -#define HAVE_UNLOCKED_IOCTL 1 - /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -1982,27 +1956,27 @@ struct super_operations { /* * Inode flags - they have no relation to superblock flags now */ -#define S_SYNC 1 /* Writes are synced at once */ -#define S_NOATIME 2 /* Do not update access times */ -#define S_APPEND 4 /* Append-only file */ -#define S_IMMUTABLE 8 /* Immutable file */ -#define S_DEAD 16 /* removed, but still open directory */ -#define S_NOQUOTA 32 /* Inode is not counted to quota */ -#define S_DIRSYNC 64 /* Directory modifications are synchronous */ -#define S_NOCMTIME 128 /* Do not update file c/mtime */ -#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ -#define S_PRIVATE 512 /* Inode is fs-internal */ -#define S_IMA 1024 /* Inode has an associated IMA struct */ -#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ -#define S_NOSEC 4096 /* no suid or xattr security attributes */ +#define S_SYNC (1 << 0) /* Writes are synced at once */ +#define S_NOATIME (1 << 1) /* Do not update access times */ +#define S_APPEND (1 << 2) /* Append-only file */ +#define S_IMMUTABLE (1 << 3) /* Immutable file */ +#define S_DEAD (1 << 4) /* removed, but still open directory */ +#define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ +#define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ +#define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ +#define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ +#define S_PRIVATE (1 << 9) /* Inode is fs-internal */ +#define S_IMA (1 << 10) /* Inode has an associated IMA struct */ +#define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ +#define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX -#define S_DAX 8192 /* Direct Access, avoiding the page cache */ +#define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else -#define S_DAX 0 /* Make all the DAX code disappear */ +#define S_DAX 0 /* Make all the DAX code disappear */ #endif -#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ -#define S_CASEFOLD 32768 /* Casefolded file */ -#define S_VERITY 65536 /* Verity file (using fs/verity/) */ +#define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ +#define S_CASEFOLD (1 << 15) /* Casefolded file */ +#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2158,6 +2132,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_DONTCACHE Evict inode as soon as it is not used anymore. * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2175,12 +2153,11 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define __I_DIRTY_TIME_EXPIRED 12 -#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) +#define I_SYNC_QUEUED (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2264,18 +2241,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -#ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -#else -static inline struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - return ERR_PTR(-ENODEV); -} -#endif extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2284,14 +2252,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); -#ifdef CONFIG_BLOCK void kill_block_super(struct super_block *sb); -#else -static inline void kill_block_super(struct super_block *sb) -{ - BUG(); -} -#endif void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); @@ -2581,93 +2542,16 @@ extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) -#ifdef CONFIG_BLOCK -extern int register_blkdev(unsigned int, const char *); -extern void unregister_blkdev(unsigned int, const char *); -extern struct block_device *bdget(dev_t); -extern struct block_device *bdgrab(struct block_device *bdev); -extern void bd_set_size(struct block_device *, loff_t size); -extern void bd_forget(struct inode *inode); -extern void bdput(struct block_device *); -extern void invalidate_bdev(struct block_device *); -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); -extern int sync_blockdev(struct block_device *bdev); -extern struct super_block *freeze_bdev(struct block_device *); -extern void emergency_thaw_all(void); -extern void emergency_thaw_bdev(struct super_block *sb); -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); -extern int fsync_bdev(struct block_device *); - extern struct super_block *blockdev_superblock; - static inline bool sb_is_blkdev_sb(struct super_block *sb) { - return sb == blockdev_superblock; -} -#else -static inline void bd_forget(struct inode *inode) {} -static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} - -static inline int emergency_thaw_bdev(struct super_block *sb) -{ - return 0; + return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) -{ -} - -static inline bool sb_is_blkdev_sb(struct super_block *sb) -{ - return false; -} -#endif +void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -#ifdef CONFIG_BLOCK -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, - void *holder); -extern struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder); -extern void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void bd_abort_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void blkdev_put(struct block_device *bdev, fmode_t mode); - -#ifdef CONFIG_SYSFS -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -#endif -#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 @@ -2698,31 +2582,12 @@ static inline void unregister_chrdev(unsigned int major, const char *name) __unregister_chrdev(major, 0, 256, name); } -/* fs/block_dev.c */ -#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ -#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ - -#ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_MAX 512 -extern const char *bdevname(struct block_device *bdev, char *buffer); -extern struct block_device *lookup_bdev(const char *); -extern void blkdev_show(struct seq_file *,off_t); - -#else -#define BLKDEV_MAJOR_MAX 0 -#endif - extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -#ifdef CONFIG_BLOCK -extern int revalidate_disk(struct gendisk *); -extern int check_disk_change(struct block_device *); -extern int __invalidate_device(struct block_device *, bool); -#endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -2798,7 +2663,7 @@ static inline void filemap_set_wb_err(struct address_space *mapping, int err) } /** - * filemap_check_wb_error - has an error occurred since the mark was sampled? + * filemap_check_wb_err - has an error occurred since the mark was sampled? * @mapping: mapping to check for writeback errors * @since: previously-sampled errseq_t * @@ -2827,7 +2692,7 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) /** * file_sample_sb_err - sample the current errseq_t to test for later errors - * @mapping: mapping to be sampled + * @file: file pointer to be sampled * * Grab the most current superblock-level errseq_t value for the given * struct file. @@ -3098,6 +2963,21 @@ extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); +/* + * Userspace may rely on the the inode number being non-zero. For example, glibc + * simply ignores files with zero i_ino in unlink() and other places. + * + * As an additional complication, if userspace was compiled with + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the + * lower 32 bits, so we need to check that those aren't zero explicitly. With + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but + * better safe than sorry. + */ +static inline bool is_zero_ino(ino_t ino) +{ + return (u32)ino == 0; +} + extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); @@ -3123,10 +3003,6 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); -#ifdef CONFIG_BLOCK -extern int bdev_read_only(struct block_device *); -#endif -extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -3439,22 +3315,28 @@ static inline int iocb_flags(struct file *file) static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { + int kiocb_flags = 0; + + if (!flags) + return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - ki->ki_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; } if (flags & RWF_HIPRI) - ki->ki_flags |= IOCB_HIPRI; + kiocb_flags |= IOCB_HIPRI; if (flags & RWF_DSYNC) - ki->ki_flags |= IOCB_DSYNC; + kiocb_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) - ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); if (flags & RWF_APPEND) - ki->ki_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_APPEND; + + ki->ki_flags |= kiocb_flags; return 0; } |