diff options
40 files changed, 318 insertions, 290 deletions
diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst index 9e38e4c221ca..eb770f891b27 100644 --- a/Documentation/filesystems/files.rst +++ b/Documentation/filesystems/files.rst @@ -116,7 +116,7 @@ before and after the reference count increment. This pattern can be seen in get_file_rcu() and __files_get_rcu(). In addition, it isn't possible to access or check fields in struct file -without first aqcuiring a reference on it under rcu lookup. Not doing +without first acquiring a reference on it under rcu lookup. Not doing that was always very dodgy and it was only usable for non-pointer data in struct file. With SLAB_TYPESAFE_BY_RCU it is necessary that callers either first acquire a reference or they must hold the files_lock of the diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index d5bf4b6b7509..e664061ed55d 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -29,7 +29,7 @@ prototypes:: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); locking rules: diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index eebcc0f9e2bc..6e903a903f8f 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1264,7 +1264,7 @@ defined: char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); }; ``d_revalidate`` @@ -1419,16 +1419,14 @@ defined: the dentry being transited from. ``d_real`` - overlay/union type filesystems implement this method to return - one of the underlying dentries hidden by the overlay. It is - used in two different modes: + overlay/union type filesystems implement this method to return one + of the underlying dentries of a regular file hidden by the overlay. - Called from file_dentry() it returns the real dentry matching - the inode argument. The real dentry may be from a lower layer - already copied up, but still referenced from the file. This - mode is selected with a non-NULL inode argument. + The 'type' argument takes the values D_REAL_DATA or D_REAL_METADATA + for returning the real underlying dentry that refers to the inode + hosting the file's data or metadata respectively. - With NULL inode the topmost real underlying dentry is returned. + For non-regular files, the 'dentry' argument is returned. Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a diff --git a/fs/attr.c b/fs/attr.c index 5a13f0c8495f..49d23b5dbab4 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -352,7 +352,7 @@ int may_setattr(struct mnt_idmap *idmap, struct inode *inode, EXPORT_SYMBOL(may_setattr); /** - * notify_change - modify attributes of a filesytem object + * notify_change - modify attributes of a filesystem object * @idmap: idmap of the mount the inode was found from * @dentry: object affected * @attr: new attributes diff --git a/fs/backing-file.c b/fs/backing-file.c index a681f38d84d8..740185198db3 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -325,9 +325,7 @@ EXPORT_SYMBOL_GPL(backing_file_mmap); static int __init backing_aio_init(void) { - backing_aio_cachep = kmem_cache_create("backing_aio", - sizeof(struct backing_aio), - 0, SLAB_HWCACHE_ALIGN, NULL); + backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN); if (!backing_aio_cachep) return -ENOMEM; diff --git a/fs/buffer.c b/fs/buffer.c index d3bcf601d3e5..b55dea034a5d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -464,7 +464,7 @@ EXPORT_SYMBOL(mark_buffer_async_write); * a successful fsync(). For example, ext2 indirect blocks need to be * written back and waited upon before fsync() returns. * - * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(), + * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(), * inode_has_buffers() and invalidate_inode_buffers() are provided for the * management of a list of dependent buffers at ->i_mapping->i_private_list. * @@ -3121,12 +3121,8 @@ void __init buffer_init(void) unsigned long nrpages; int ret; - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - NULL); - + bh_cachep = KMEM_CACHE(buffer_head, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /* * Limit the bh occupancy to 10% of ZONE_NORMAL */ diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 7b3fc189593a..0ad52fbe51c9 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -74,13 +74,7 @@ struct fscrypt_nokey_name { static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; + return is_dot_dotdot(str->name, str->len); } /** diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 03bd55069d86..2fe0f3af1a08 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1949,16 +1949,6 @@ out: return rc; } -static bool is_dot_dotdot(const char *name, size_t name_size) -{ - if (name_size == 1 && name[0] == '.') - return true; - else if (name_size == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - /** * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext * @plaintext_name: The plaintext name diff --git a/fs/efs/super.c b/fs/efs/super.c index f17fdac76b2e..c837ac89b384 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -14,19 +14,14 @@ #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/blkdev.h> - +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "efs.h" #include <linux/efs_vh.h> #include <linux/efs_fs_sb.h> static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); -static int efs_fill_super(struct super_block *s, void *d, int silent); - -static struct dentry *efs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super); -} +static int efs_init_fs_context(struct fs_context *fc); static void efs_kill_sb(struct super_block *s) { @@ -35,15 +30,6 @@ static void efs_kill_sb(struct super_block *s) kfree(sbi); } -static struct file_system_type efs_fs_type = { - .owner = THIS_MODULE, - .name = "efs", - .mount = efs_mount, - .kill_sb = efs_kill_sb, - .fs_flags = FS_REQUIRES_DEV, -}; -MODULE_ALIAS_FS("efs"); - static struct pt_types sgi_pt_types[] = { {0x00, "SGI vh"}, {0x01, "SGI trkrepl"}, @@ -63,6 +49,27 @@ static struct pt_types sgi_pt_types[] = { {0, NULL} }; +enum { + Opt_explicit_open, +}; + +static const struct fs_parameter_spec efs_param_spec[] = { + fsparam_flag ("explicit-open", Opt_explicit_open), + {} +}; + +/* + * File system definition and registration. + */ +static struct file_system_type efs_fs_type = { + .owner = THIS_MODULE, + .name = "efs", + .kill_sb = efs_kill_sb, + .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = efs_init_fs_context, + .parameters = efs_param_spec, +}; +MODULE_ALIAS_FS("efs"); static struct kmem_cache * efs_inode_cachep; @@ -108,18 +115,10 @@ static void destroy_inodecache(void) kmem_cache_destroy(efs_inode_cachep); } -static int efs_remount(struct super_block *sb, int *flags, char *data) -{ - sync_filesystem(sb); - *flags |= SB_RDONLY; - return 0; -} - static const struct super_operations efs_superblock_operations = { .alloc_inode = efs_alloc_inode, .free_inode = efs_free_inode, .statfs = efs_statfs, - .remount_fs = efs_remount, }; static const struct export_operations efs_export_ops = { @@ -249,26 +248,26 @@ static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) { return 0; } -static int efs_fill_super(struct super_block *s, void *d, int silent) +static int efs_fill_super(struct super_block *s, struct fs_context *fc) { struct efs_sb_info *sb; struct buffer_head *bh; struct inode *root; - sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL); + sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL); if (!sb) return -ENOMEM; s->s_fs_info = sb; s->s_time_min = 0; s->s_time_max = U32_MAX; - + s->s_magic = EFS_SUPER_MAGIC; if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) { pr_err("device does not support %d byte blocks\n", EFS_BLOCKSIZE); return -EINVAL; } - + /* read the vh (volume header) block */ bh = sb_bread(s, 0); @@ -294,7 +293,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) pr_err("cannot read superblock\n"); return -EIO; } - + if (efs_validate_super(sb, (struct efs_super *) bh->b_data)) { #ifdef DEBUG pr_warn("invalid superblock at block %u\n", @@ -328,6 +327,61 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) return 0; } +static void efs_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static int efs_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, efs_fill_super); +} + +static int efs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + int token; + struct fs_parse_result result; + + token = fs_parse(fc, efs_param_spec, param, &result); + if (token < 0) + return token; + return 0; +} + +static int efs_reconfigure(struct fs_context *fc) +{ + sync_filesystem(fc->root->d_sb); + + return 0; +} + +struct efs_context { + unsigned long s_mount_opts; +}; + +static const struct fs_context_operations efs_context_opts = { + .parse_param = efs_parse_param, + .get_tree = efs_get_tree, + .reconfigure = efs_reconfigure, + .free = efs_free_fc, +}; + +/* + * Set up the filesystem mount context. + */ +static int efs_init_fs_context(struct fs_context *fc) +{ + struct efs_context *ctx; + + ctx = kzalloc(sizeof(struct efs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + fc->fs_private = ctx; + fc->ops = &efs_context_opts; + + return 0; +} + static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct efs_sb_info *sbi = SUPER_INFO(sb); diff --git a/fs/eventfd.c b/fs/eventfd.c index ad8186d47ba7..9afdb722fa92 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -251,7 +251,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c ssize_t res; __u64 ucnt; - if (count < sizeof(ucnt)) + if (count != sizeof(ucnt)) return -EINVAL; if (copy_from_user(&ucnt, buf, sizeof(ucnt))) return -EFAULT; @@ -283,13 +283,18 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) { struct eventfd_ctx *ctx = f->private_data; + __u64 cnt; spin_lock_irq(&ctx->wqh.lock); - seq_printf(m, "eventfd-count: %16llx\n", - (unsigned long long)ctx->count); + cnt = ctx->count; spin_unlock_irq(&ctx->wqh.lock); - seq_printf(m, "eventfd-id: %d\n", ctx->id); - seq_printf(m, "eventfd-semaphore: %d\n", + + seq_printf(m, + "eventfd-count: %16llx\n" + "eventfd-id: %d\n" + "eventfd-semaphore: %d\n", + cnt, + ctx->id, !!(ctx->flags & EFD_SEMAPHORE)); } #endif @@ -383,6 +388,7 @@ static int do_eventfd(unsigned int count, int flags) /* Check the EFD_* constants for consistency. */ BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); + BUILD_BUG_ON(EFD_SEMAPHORE != (1 << 0)); if (flags & ~EFD_FLAGS_SET) return -EINVAL; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3534d36a1474..786e023a48b2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -206,7 +206,7 @@ struct eventpoll { */ struct epitem *ovflist; - /* wakeup_source used when ep_scan_ready_list is running */ + /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */ struct wakeup_source *ws; /* The user that created the eventpoll descriptor */ @@ -1153,7 +1153,7 @@ static inline bool chain_epi_lockless(struct epitem *epi) * This callback takes a read lock in order not to contend with concurrent * events from another file descriptor, thus all modifications to ->rdllist * or ->ovflist are lockless. Read lock is paired with the write lock from - * ep_scan_ready_list(), which stops all list modifications and guarantees + * ep_start/done_scan(), which stops all list modifications and guarantees * that lists state is seen correctly. * * Another thing worth to mention is that ep_poll_callback() can be called @@ -1751,7 +1751,7 @@ static int ep_send_events(struct eventpoll *ep, * availability. At this point, no one can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by - * ep_scan_ready_list() holding "mtx" and the + * ep_send_events() holding "mtx" and the * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1904,7 +1904,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, __set_current_state(TASK_INTERRUPTIBLE); /* - * Do the final check under the lock. ep_scan_ready_list() + * Do the final check under the lock. ep_start/done_scan() * plays with two lists (->rdllist and ->ovflist) and there * is always a race when both lists are empty for short * period of time although events are pending, so lock is diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 3ae0154c5680..07ea3d62b298 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -255,7 +255,7 @@ static bool filldir_one(struct dir_context *ctx, const char *name, int len, container_of(ctx, struct getdents_callback, ctx); buf->sequence++; - if (buf->ino == ino && len <= NAME_MAX) { + if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65294e3b0bef..dda0aed95464 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3364,17 +3364,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const u8 *name, size_t len) -{ - if (len == 1 && name[0] == '.') - return true; - - if (len == 2 && name[0] == '.' && name[1] == '.') - return true; - - return false; -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { diff --git a/fs/fcntl.c b/fs/fcntl.c index c80a6acad742..c3e342eb74af 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -846,12 +846,6 @@ int send_sigurg(struct fown_struct *fown) static DEFINE_SPINLOCK(fasync_lock); static struct kmem_cache *fasync_cache __ro_after_init; -static void fasync_free_rcu(struct rcu_head *head) -{ - kmem_cache_free(fasync_cache, - container_of(head, struct fasync_struct, fa_rcu)); -} - /* * Remove a fasync entry. If successfully removed, return * positive and clear the FASYNC flag. If no entry exists, @@ -877,7 +871,7 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) write_unlock_irq(&fa->fa_lock); *fp = fa->fa_next; - call_rcu(&fa->fa_rcu, fasync_free_rcu); + kfree_rcu(fa, fa_rcu); filp->f_flags &= ~FASYNC; result = 1; break; diff --git a/fs/fhandle.c b/fs/fhandle.c index 18b3ba8dc8ea..57a12614addf 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3d84fcc471c6..e4f17c53ddfc 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -141,6 +141,31 @@ static void wb_wakeup(struct bdi_writeback *wb) spin_unlock_irq(&wb->work_lock); } +/* + * This function is used when the first inode for this wb is marked dirty. It + * wakes-up the corresponding bdi thread which should then take care of the + * periodic background write-out of dirty inodes. Since the write-out would + * starts only 'dirty_writeback_interval' centisecs from now anyway, we just + * set up a timer which wakes the bdi thread up later. + * + * Note, we wouldn't bother setting up the timer, but this function is on the + * fast-path (used by '__mark_inode_dirty()'), so we save few context switches + * by delaying the wake-up. + * + * We have to be careful not to postpone flush work if it is scheduled for + * earlier. Thus we use queue_delayed_work(). + */ +static void wb_wakeup_delayed(struct bdi_writeback *wb) +{ + unsigned long timeout; + + timeout = msecs_to_jiffies(dirty_writeback_interval * 10); + spin_lock_irq(&wb->work_lock); + if (test_bit(WB_registered, &wb->state)) + queue_delayed_work(bdi_wq, &wb->dwork, timeout); + spin_unlock_irq(&wb->work_lock); +} + static void finish_writeback_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { diff --git a/fs/fs_parser.c b/fs/fs_parser.c index edb3712dcfa5..a4d6ca0b8971 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -83,8 +83,8 @@ static const struct fs_parameter_spec *fs_lookup_key( } /* - * fs_parse - Parse a filesystem configuration parameter - * @fc: The filesystem context to log errors through. + * __fs_parse - Parse a filesystem configuration parameter + * @log: The filesystem context to log errors through. * @desc: The parameter description to use. * @param: The parameter. * @result: Where to place the result of the parse diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index b0cb70400996..ce9346099c72 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -30,7 +30,7 @@ struct hfsplus_wd { * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes * @buf: buffer for I/O * @data: output pointer for location of requested data - * @opf: request op flags + * @opf: I/O operation type and flags * * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads diff --git a/fs/inode.c b/fs/inode.c index 91048c4c9c9e..6d0d54230363 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -588,7 +588,8 @@ void dump_mapping(const struct address_space *mapping) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (get_kernel_nofault(dentry, dentry_ptr)) { + if (get_kernel_nofault(dentry, dentry_ptr) || + !dentry.d_parent || !dentry.d_name.name) { pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", a_ops, ino, dentry_ptr); return; diff --git a/fs/libfs.c b/fs/libfs.c index d3d31197c8e4..6182e2b03094 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1786,7 +1786,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) const struct inode *dir = READ_ONCE(dentry->d_inode); struct super_block *sb = dentry->d_sb; const struct unicode_map *um = sb->s_encoding; - int ret = 0; + int ret; if (!dir || !IS_CASEFOLDED(dir)) return 0; diff --git a/fs/mbcache.c b/fs/mbcache.c index 82aa7a35db26..fe2624e17253 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -426,9 +426,8 @@ EXPORT_SYMBOL(mb_cache_destroy); static int __init mbcache_init(void) { - mb_entry_cache = kmem_cache_create("mbcache", - sizeof(struct mb_cache_entry), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); + mb_entry_cache = KMEM_CACHE(mb_cache_entry, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD); if (!mb_entry_cache) return -ENOMEM; return 0; diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 64c5205e2b5e..3c60f1eaca61 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -214,7 +214,7 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from, * anything at all. */ if (nr_extents == 0) - return 0; + return -EINVAL; /* * Here we know that nr_extents is greater than zero which means diff --git a/fs/namei.c b/fs/namei.c index 4e0de939fea1..4c961e07157c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2676,10 +2676,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, if (!len) return -EACCES; - if (unlikely(name[0] == '.')) { - if (len < 2 || (len == 2 && name[1] == '.')) - return -EACCES; - } + if (is_dot_dotdot(name, len)) + return -EACCES; while (len--) { unsigned int c = *(const unsigned char *)name++; diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index ee3093be5170..144aa80cca43 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -419,7 +419,7 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, * fnd contains tree's path to insert to. * If fnd is not NULL then dir is locked. */ - inode = ntfs_create_inode(mnt_idmap(file->f_path.mnt), dir, dentry, uni, + inode = ntfs_create_inode(file_mnt_idmap(file), dir, dentry, uni, mode, 0, NULL, 0, fnd); err = IS_ERR(inode) ? PTR_ERR(inode) : finish_open(file, dentry, ntfs_file_open); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4ab66e3d4cff..df2ad2f60798 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -28,41 +28,38 @@ MODULE_LICENSE("GPL"); struct ovl_dir_cache; -static struct dentry *ovl_d_real(struct dentry *dentry, - const struct inode *inode) +static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type) { - struct dentry *real = NULL, *lower; + struct dentry *upper, *lower; int err; - /* - * vfs is only expected to call d_real() with NULL from d_real_inode() - * and with overlay inode from file_dentry() on an overlay file. - * - * TODO: remove @inode argument from d_real() API, remove code in this - * function that deals with non-NULL @inode and remove d_real() call - * from file_dentry(). - */ - if (inode && d_inode(dentry) == inode) - return dentry; - else if (inode) + switch (type) { + case D_REAL_DATA: + case D_REAL_METADATA: + break; + default: goto bug; + } if (!d_is_reg(dentry)) { /* d_real_inode() is only relevant for regular files */ return dentry; } - real = ovl_dentry_upper(dentry); - if (real && (inode == d_inode(real))) - return real; + upper = ovl_dentry_upper(dentry); + if (upper && (type == D_REAL_METADATA || + ovl_has_upperdata(d_inode(dentry)))) + return upper; - if (real && !inode && ovl_has_upperdata(d_inode(dentry))) - return real; + if (type == D_REAL_METADATA) { + lower = ovl_dentry_lower(dentry); + goto real_lower; + } /* - * Best effort lazy lookup of lowerdata for !inode case to return + * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return * the real lowerdata dentry. The only current caller of d_real() with - * NULL inode is d_real_inode() from trace_uprobe and this caller is + * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is * likely going to be followed reading from the file, before placing * uprobes on offset within the file, so lowerdata should be available * when setting the uprobe. @@ -73,18 +70,13 @@ static struct dentry *ovl_d_real(struct dentry *dentry, lower = ovl_dentry_lowerdata(dentry); if (!lower) goto bug; - real = lower; - /* Handle recursion */ - real = d_real(real, inode); +real_lower: + /* Handle recursion into stacked lower fs */ + return d_real(lower, type); - if (!inode || inode == d_inode(real)) - return real; bug: - WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", - __func__, dentry, inode ? inode->i_sb->s_id : "NULL", - inode ? inode->i_ino : 0, real, - real && d_inode(real) ? d_inode(real)->i_ino : 0); + WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type); return dentry; } diff --git a/fs/pipe.c b/fs/pipe.c index f1adbfe743d4..50c8a8596b52 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) +#define cmp_int(l, r) ((l > r) - (l < r)) + +#ifdef CONFIG_PROVE_LOCKING +static int pipe_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) { - if (pipe->files) - mutex_lock_nested(&pipe->mutex, subclass); + return cmp_int((unsigned long) a, (unsigned long) b); } +#endif void pipe_lock(struct pipe_inode_info *pipe) { - /* - * pipe_lock() nests non-pipe inode locks (for writing to a file) - */ - pipe_lock_nested(pipe, I_MUTEX_PARENT); + if (pipe->files) + mutex_lock(&pipe->mutex); } EXPORT_SYMBOL(pipe_lock); @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) } EXPORT_SYMBOL(pipe_unlock); -static inline void __pipe_lock(struct pipe_inode_info *pipe) -{ - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); -} - -static inline void __pipe_unlock(struct pipe_inode_info *pipe) -{ - mutex_unlock(&pipe->mutex); -} - void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { BUG_ON(pipe1 == pipe2); - if (pipe1 < pipe2) { - pipe_lock_nested(pipe1, I_MUTEX_PARENT); - pipe_lock_nested(pipe2, I_MUTEX_CHILD); - } else { - pipe_lock_nested(pipe2, I_MUTEX_PARENT); - pipe_lock_nested(pipe1, I_MUTEX_CHILD); - } + if (pipe1 > pipe2) + swap(pipe1, pipe2); + + pipe_lock(pipe1); + pipe_lock(pipe2); } static void anon_pipe_buf_release(struct pipe_inode_info *pipe, @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) return 0; ret = 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); /* * We only wake up writers if the pipe was full when we started @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) ret = -EAGAIN; break; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); /* * We only get here if we didn't actually read anything. @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); wake_next_reader = true; } if (pipe_empty(pipe->head, pipe->tail)) wake_next_reader = false; - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); if (was_full) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) if (unlikely(total_len == 0)) return 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (!pipe->readers) { send_sig(SIGPIPE, current, 0); @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * after waiting we need to re-check whether the pipe * become empty while we dropped the lock. */ - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); was_empty = pipe_empty(pipe->head, pipe->tail); wake_next_writer = true; } out: if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) wake_next_writer = false; - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); /* * If we do do a wakeup event, we do a 'sync' wakeup, because we @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FIONREAD: - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); count = 0; head = pipe->head; tail = pipe->tail; @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) count += pipe->bufs[tail & mask].len; tail++; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return put_user(count, (int __user *)arg); #ifdef CONFIG_WATCH_QUEUE case IOC_WATCH_QUEUE_SET_SIZE: { int ret; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); ret = watch_queue_set_size(pipe, arg); - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return ret; } @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) { struct pipe_inode_info *pipe = file->private_data; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (file->f_mode & FMODE_READ) pipe->readers--; if (file->f_mode & FMODE_WRITE) @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return 0; @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) struct pipe_inode_info *pipe = filp->private_data; int retval = 0; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); if (filp->f_mode & FMODE_READ) retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) /* this can happen only if on == T */ fasync_helper(-1, filp, 0, &pipe->fasync_readers); } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return retval; } @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) pipe->nr_accounted = pipe_bufs; pipe->user = user; mutex_init(&pipe->mutex); + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); return pipe; } @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) filp->private_data = pipe; /* OK, we have a pipe and it's pinned down */ - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); /* We can only do regular read/write on fifos */ stream_open(inode, filp); @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } /* Ok! */ - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return 0; err_rd: @@ -1230,7 +1221,7 @@ err_wr: goto err; err: - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); put_pipe_info(inode, pipe); return ret; @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) if (!pipe) return -EBADF; - __pipe_lock(pipe); + mutex_lock(&pipe->mutex); switch (cmd) { case F_SETPIPE_SZ: @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) break; } - __pipe_unlock(pipe); + mutex_unlock(&pipe->mutex); return ret; } diff --git a/fs/select.c b/fs/select.c index 0ee55af1a55c..9515c3fa1a03 100644 --- a/fs/select.c +++ b/fs/select.c @@ -476,7 +476,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, wait->_key |= POLLOUT_SET; } -static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) +static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; @@ -839,7 +839,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) struct poll_list { struct poll_list *next; - int len; + unsigned int len; struct pollfd entries[]; }; @@ -975,14 +975,15 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time) { struct poll_wqueues table; - int err = -EFAULT, fdcount, len; + int err = -EFAULT, fdcount; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; struct poll_list *const head = (struct poll_list *)stack_pps; struct poll_list *walk = head; - unsigned long todo = nfds; + unsigned int todo = nfds; + unsigned int len; if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; @@ -998,9 +999,9 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, sizeof(struct pollfd) * walk->len)) goto out_fds; - todo -= walk->len; - if (!todo) + if (walk->len >= todo) break; + todo -= walk->len; len = min(todo, POLLFD_PER_PAGE); walk = walk->next = kmalloc(struct_size(walk, entries, len), @@ -1020,7 +1021,7 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; - int j; + unsigned int j; for (j = walk->len; j; fds++, ufds++, j--) unsafe_put_user(fds->revents, &ufds->revents, Efault); diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 410ab2a44d2f..19bcb51a2203 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -83,9 +83,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh) return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); } -/* - * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) - */ static Indirect *get_branch(struct inode *inode, int depth, int offsets[], @@ -105,15 +102,18 @@ static Indirect *get_branch(struct inode *inode, bh = sb_bread(sb, block); if (!bh) goto failure; + read_lock(&pointers_lock); if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); + read_unlock(&pointers_lock); if (!p->key) goto no_block; } return NULL; changed: + read_unlock(&pointers_lock); brelse(bh); *err = -EAGAIN; goto no_block; @@ -219,9 +219,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b goto out; reread: - read_lock(&pointers_lock); partial = get_branch(inode, depth, offsets, chain, &err); - read_unlock(&pointers_lock); /* Simplest case - block found, no allocation needed */ if (!partial) { @@ -291,9 +289,9 @@ static Indirect *find_shared(struct inode *inode, *top = 0; for (k = depth; k > 1 && !offsets[k-1]; k--) ; + partial = get_branch(inode, k, offsets, chain, &err); write_lock(&pointers_lock); - partial = get_branch(inode, k, offsets, chain, &err); if (!partial) partial = chain + k-1; /* diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 961f4d88f9ef..0c0695763bea 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -193,7 +193,6 @@ do { \ #ifndef smp_store_release #define smp_store_release(p, v) \ do { \ - compiletime_assert_atomic_type(*p); \ barrier(); \ WRITE_ONCE(*p, v); \ } while (0) @@ -203,7 +202,6 @@ do { \ #define smp_load_acquire(p) \ ({ \ __unqual_scalar_typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ barrier(); \ (typeof(*p))___p1; \ }) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1a97277f99b1..8e7af9a03b41 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); -void wb_wakeup_delayed(struct bdi_writeback *wb); void wb_wait_for_completion(struct wb_completion *done); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f..d616a745a34c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -125,6 +125,11 @@ enum dentry_d_lock_class DENTRY_D_LOCK_NESTED }; +enum d_real_type { + D_REAL_DATA, + D_REAL_METADATA, +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); @@ -139,7 +144,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); } ____cacheline_aligned; /* @@ -546,24 +551,23 @@ static inline struct inode *d_backing_inode(const struct dentry *upper) /** * d_real - Return the real dentry * @dentry: the dentry to query - * @inode: inode to select the dentry from multiple layers (can be NULL) + * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ -static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode) +static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode); + return dentry->d_op->d_real(dentry, type); else return dentry; } /** - * d_real_inode - Return the real inode + * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. @@ -572,7 +576,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL)); + return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { diff --git a/include/linux/fs.h b/include/linux/fs.h index 55144c12ee0f..d15d808e8e31 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -483,10 +483,10 @@ struct address_space { pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; - struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; spinlock_t i_private_lock; struct list_head i_private_list; + struct rw_semaphore i_mmap_rwsem; void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* @@ -908,7 +908,8 @@ static inline loff_t i_size_read(const struct inode *inode) preempt_enable(); return i_size; #else - return inode->i_size; + /* Pairs with smp_store_release() in i_size_write() */ + return smp_load_acquire(&inode->i_size); #endif } @@ -930,7 +931,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; preempt_enable(); #else - inode->i_size = i_size; + /* + * Pairs with smp_load_acquire() in i_size_read() to ensure + * changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); #endif } @@ -1079,9 +1085,20 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +/* + * file_dentry() is a relic from the days that overlayfs was using files with a + * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. + * In those days, file_dentry() was needed to get the underlying fs dentry that + * matches f_inode. + * Files with "fake" path should not exist nowadays, so use an assertion to make + * sure that file_dentry() was not papering over filesystem bugs. + */ static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file)); + struct dentry *dentry = file->f_path.dentry; + + WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); + return dentry; } struct fasync_struct { @@ -2929,6 +2946,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -3337,6 +3365,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) + return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) @@ -3347,6 +3377,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; + if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { + if (IS_APPEND(file_inode(ki->ki_filp))) + return -EPERM; + ki->ki_flags &= ~IOCB_APPEND; + } + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 48ad69f7722e..2203d3194b91 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -301,9 +301,12 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO O_APPEND */ #define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) +/* per-IO negation of O_APPEND */ +#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND) + RWF_APPEND | RWF_NOAPPEND) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) diff --git a/init/initramfs.c b/init/initramfs.c index 76deb48c38cb..d3c623dde01a 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -679,8 +679,6 @@ static void __init populate_initrd_image(char *err) struct file *file; loff_t pos = 0; - unpack_to_rootfs(__initramfs_start, __initramfs_size); - printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e0aa6b440ca5..73715d10c812 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -714,12 +714,11 @@ EXPORT_SYMBOL(iov_iter_discard); static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { + const struct iovec *iov = iter_iov(i); size_t size = i->count; size_t skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - const struct iovec *iov = iter_iov(i) + k; + do { size_t len = iov->iov_len - skip; if (len > size) @@ -729,34 +728,36 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, if ((unsigned long)(iov->iov_base + skip) & addr_mask) return false; + iov++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return true; } static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, unsigned len_mask) { - size_t size = i->count; + const struct bio_vec *bvec = i->bvec; unsigned skip = i->iov_offset; - unsigned k; + size_t size = i->count; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->bvec[k].bv_len - skip; + do { + size_t len = bvec->bv_len; if (len > size) len = size; if (len & len_mask) return false; - if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask) + if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) return false; + bvec++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return true; } @@ -800,13 +801,12 @@ EXPORT_SYMBOL_GPL(iov_iter_is_aligned); static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { + const struct iovec *iov = iter_iov(i); unsigned long res = 0; size_t size = i->count; size_t skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - const struct iovec *iov = iter_iov(i) + k; + do { size_t len = iov->iov_len - skip; if (len) { res |= (unsigned long)iov->iov_base + skip; @@ -814,30 +814,31 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) len = size; res |= len; size -= len; - if (!size) - break; } - } + iov++; + skip = 0; + } while (size); return res; } static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) { + const struct bio_vec *bvec = i->bvec; unsigned res = 0; size_t size = i->count; unsigned skip = i->iov_offset; - unsigned k; - for (k = 0; k < i->nr_segs; k++, skip = 0) { - size_t len = i->bvec[k].bv_len - skip; - res |= (unsigned long)i->bvec[k].bv_offset + skip; + do { + size_t len = bvec->bv_len - skip; + res |= (unsigned long)bvec->bv_offset + skip; if (len > size) len = size; res |= len; + bvec++; size -= len; - if (!size) - break; - } + skip = 0; + } while (size); + return res; } @@ -1166,11 +1167,12 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) EXPORT_SYMBOL(dup_iter); static __noclone int copy_compat_iovec_from_user(struct iovec *iov, - const struct iovec __user *uvec, unsigned long nr_segs) + const struct iovec __user *uvec, u32 nr_segs) { const struct compat_iovec __user *uiov = (const struct compat_iovec __user *)uvec; - int ret = -EFAULT, i; + int ret = -EFAULT; + u32 i; if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1e3447bccdb1..039dc74b505a 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -372,31 +372,6 @@ static int __init default_bdi_init(void) } subsys_initcall(default_bdi_init); -/* - * This function is used when the first inode for this wb is marked dirty. It - * wakes-up the corresponding bdi thread which should then take care of the - * periodic background write-out of dirty inodes. Since the write-out would - * starts only 'dirty_writeback_interval' centisecs from now anyway, we just - * set up a timer which wakes the bdi thread up later. - * - * Note, we wouldn't bother setting up the timer, but this function is on the - * fast-path (used by '__mark_inode_dirty()'), so we save few context switches - * by delaying the wake-up. - * - * We have to be careful not to postpone flush work if it is scheduled for - * earlier. Thus we use queue_delayed_work(). - */ -void wb_wakeup_delayed(struct bdi_writeback *wb) -{ - unsigned long timeout; - - timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - spin_lock_irq(&wb->work_lock); - if (test_bit(WB_registered, &wb->state)) - queue_delayed_work(bdi_wq, &wb->dwork, timeout); - spin_unlock_irq(&wb->work_lock); -} - static void wb_update_bandwidth_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(to_delayed_work(work), diff --git a/mm/filemap.c b/mm/filemap.c index 750e779c23db..a72dd2eafd5a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2609,15 +2609,6 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); /* - * Pairs with a barrier in - * block_write_end()->mark_buffer_dirty() or other page - * dirtying routines like iomap_write_end() to ensure - * changes to page contents are visible before we see - * increased inode size. - */ - smp_rmb(); - - /* * Once we start copying data, we don't want to be touching any * cachelines that might be contended: */ diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index e19ab0e85709..759f86e7d263 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -10,7 +10,6 @@ #include <linux/mount.h> #include <sys/syscall.h> #include <sys/stat.h> -#include <sys/mount.h> #include <sys/mman.h> #include <sched.h> #include <fcntl.h> @@ -32,7 +31,11 @@ static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) { return syscall(__NR_fsmount, fd, flags, attr_flags); } - +static int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return syscall(__NR_mount, src, tgt, fst, flags, data); +} static int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) @@ -166,8 +169,7 @@ int main(int argc, char **argv) ksft_test_result_skip("unable to create a new mount namespace\n"); return 1; } - - if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) { + if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) { pr_perror("mount"); return 1; } diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c index 50ed5d475dd1..bcf51d785a37 100644 --- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c +++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c @@ -218,7 +218,7 @@ static bool move_mount_set_group_supported(void) if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0)) return -1; - ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM, + ret = syscall(__NR_move_mount, AT_FDCWD, SET_GROUP_FROM, AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP); umount2("/tmp", MNT_DETACH); @@ -363,7 +363,7 @@ TEST_F(move_mount_set_group, complex_sharing_copying) CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0); ASSERT_EQ(wait_for_pid(pid), 0); - ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "", + ASSERT_EQ(syscall(__NR_move_mount, ca_from.mntfd, "", ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); |