diff options
author | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:38:40 +0300 |
---|---|---|
committer | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:40:21 +0300 |
commit | 9c881021a269af242594e2dfc79f1c4701404887 (patch) | |
tree | c8ec14f412d7ea35009b2dee08770082ddbb5c6e /fs | |
parent | e9479d98b87227b8b7502c4c1e778887b23799f1 (diff) | |
parent | cf06e1ab1c3ed354da5873e646f2164fea147c88 (diff) | |
download | linux-dev-5.10-intel.tar.xz |
Merge branch 'dev-5.10' into dev-5.10-inteldev-5.10-intel
Pull 5.10.67 stable from OpenBMC upstream.
Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
Diffstat (limited to 'fs')
65 files changed, 640 insertions, 421 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index ae0c38ad1fcb..0791480bf922 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -398,7 +398,7 @@ static int v9fs_test_inode(struct inode *inode, void *data) umode = p9mode2unixmode(v9ses, st, &rdev); /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) + if (inode_wrong_type(inode, umode)) return 0; /* compare qid details */ @@ -1360,7 +1360,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) * Don't update inode if the file type is different */ umode = p9mode2unixmode(v9ses, st, &rdev); - if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) + if (inode_wrong_type(inode, umode)) goto out; /* diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0028eccb665a..72b67d810b8c 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -59,7 +59,7 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data) struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) + if (inode_wrong_type(inode, st->st_mode)) return 0; if (inode->i_generation != st->st_gen) @@ -933,7 +933,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) /* * Don't update inode if the file type is different */ - if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) + if (inode_wrong_type(inode, st->st_mode)) goto out; /* diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8de4bf8edb9c..5a43f8e07122 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -308,6 +308,21 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, mod); } +/* + * Called every time after doing a buffered, direct IO or memory mapped write. + * + * This is to ensure that if we write to a file that was previously fsynced in + * the current transaction, then try to fsync it again in the same transaction, + * we will know that there were changes in the file and that it needs to be + * logged. + */ +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) +{ + spin_lock(&inode->lock); + inode->last_sub_trans = inode->root->log_transid; + spin_unlock(&inode->lock); +} + static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { int ret = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ffa48ac98d1e..6ab91661cd26 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1862,7 +1862,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; u64 start_pos; u64 end_pos; ssize_t num_written = 0; @@ -2006,14 +2005,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, inode_unlock(inode); - /* - * We also have to set last_sub_trans to the current log transid, - * otherwise subsequent syncs to a file that's been synced in this - * transaction will appear to have already occurred. - */ - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->last_sub_trans = root->log_transid; - spin_unlock(&BTRFS_I(inode)->lock); + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); + if (num_written > 0) num_written = generic_write_sync(iocb, num_written); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f21b8fbfd4b..ff3f0638cdb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -547,7 +547,7 @@ again: * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) { + if (inode_need_compress(BTRFS_I(inode), start, end)) { WARN_ON(pages); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { @@ -1202,11 +1202,6 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> PAGE_SHIFT; - /* atomic_sub_return implies a barrier */ - if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < - 5 * SZ_1M) - cond_wake_up_nomb(&fs_info->async_submit_wait); - /* * ->inode could be NULL if async_chunk_start has failed to compress, * in which case we don't have anything to submit, yet we need to @@ -1215,6 +1210,11 @@ static noinline void async_cow_submit(struct btrfs_work *work) */ if (async_chunk->inode) submit_compressed_extents(async_chunk); + + /* atomic_sub_return implies a barrier */ + if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < + 5 * SZ_1M) + cond_wake_up_nomb(&fs_info->async_submit_wait); } static noinline void async_cow_free(struct btrfs_work *work) @@ -8449,9 +8449,7 @@ again: set_page_dirty(page); SetPageUptodate(page); - BTRFS_I(inode)->last_trans = fs_info->generation; - BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); unlock_extent_cached(io_tree, page_start, page_end, &cached_state); @@ -8904,8 +8902,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, bool dest_log_pinned = false; bool need_abort = false; - /* we only allow rename subvolume link between subvolumes */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + /* + * For non-subvolumes allow exchange only within one subvolume, in the + * same inode namespace. Two subvolumes (represented as directory) can + * be exchanged as they're a logical link and have a fixed inode number. + */ + if (root != dest && + (old_ino != BTRFS_FIRST_FREE_OBJECTID || + new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 858d9153a1cd..f73654d93fa0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -171,7 +171,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, spin_lock(&inode->lock); inode->last_trans = trans->transaction->transid; inode->last_sub_trans = inode->root->log_transid; - inode->last_log_commit = inode->root->last_log_commit; + inode->last_log_commit = inode->last_sub_trans - 1; spin_unlock(&inode->lock); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f36928efcf92..ec25e5eab349 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -708,7 +708,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, */ ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset); - if (ret == 0) { + if (ret < 0) { + goto out; + } else if (ret == 0) { btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, ins.objectid, ins.offset, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 920c84fae710..b4fcc48f255b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1129,6 +1129,9 @@ static void btrfs_close_one_device(struct btrfs_device *device) fs_devices->rw_devices--; } + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) fs_devices->missing_devices--; @@ -2059,7 +2062,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (IS_ERR(device)) { if (PTR_ERR(device) == -ENOENT && - strcmp(device_path, "missing") == 0) + device_path && strcmp(device_path, "missing") == 0) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = PTR_ERR(device); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 45093a765a9b..678dac8365ed 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1752,7 +1752,14 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_cap_flush *ceph_alloc_cap_flush(void) { - return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); + struct ceph_cap_flush *cf; + + cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); + if (!cf) + return NULL; + + cf->is_capsnap = false; + return cf; } void ceph_free_cap_flush(struct ceph_cap_flush *cf) @@ -1787,7 +1794,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc, prev->wake = true; wake = false; } - list_del(&cf->g_list); + list_del_init(&cf->g_list); return wake; } @@ -1802,7 +1809,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci, prev->wake = true; wake = false; } - list_del(&cf->i_list); + list_del_init(&cf->i_list); return wake; } @@ -2422,7 +2429,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { - if (!cf->caps) { + if (cf->is_capsnap) { last_snap_flush = cf->tid; break; } @@ -2441,7 +2448,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, first_tid = cf->tid + 1; - if (cf->caps) { + if (!cf->is_capsnap) { struct cap_msg_args arg; dout("kick_flushing_caps %p cap %p tid %llu %s\n", @@ -3564,7 +3571,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, cleaned = cf->caps; /* Is this a capsnap? */ - if (cf->caps == 0) + if (cf->is_capsnap) continue; if (cf->tid <= flush_tid) { @@ -3637,8 +3644,9 @@ out: while (!list_empty(&to_remove)) { cf = list_first_entry(&to_remove, struct ceph_cap_flush, i_list); - list_del(&cf->i_list); - ceph_free_cap_flush(cf); + list_del_init(&cf->i_list); + if (!cf->is_capsnap) + ceph_free_cap_flush(cf); } if (wake_ci) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1701902415c4..816cea497537 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1618,7 +1618,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&mdsc->cap_dirty_lock); list_for_each_entry(cf, &to_remove, i_list) - list_del(&cf->g_list); + list_del_init(&cf->g_list); if (!list_empty(&ci->i_dirty_item)) { pr_warn_ratelimited( @@ -1670,8 +1670,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_cap_flush *cf; cf = list_first_entry(&to_remove, struct ceph_cap_flush, i_list); - list_del(&cf->i_list); - ceph_free_cap_flush(cf); + list_del_init(&cf->i_list); + if (!cf->is_capsnap) + ceph_free_cap_flush(cf); } wake_up_all(&ci->i_cap_wq); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 1096d1d3a84c..47f2903bacb9 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -393,9 +393,11 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) { int i; - for (i = 0; i < m->possible_max_rank; i++) - kfree(m->m_info[i].export_targets); - kfree(m->m_info); + if (m->m_info) { + for (i = 0; i < m->possible_max_rank; i++) + kfree(m->m_info[i].export_targets); + kfree(m->m_info); + } kfree(m->m_data_pg_pools); kfree(m); } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 803b60a96702..0369f672a76f 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -487,6 +487,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode); return; } + capsnap->cap_flush.is_capsnap = true; + INIT_LIST_HEAD(&capsnap->cap_flush.i_list); + INIT_LIST_HEAD(&capsnap->cap_flush.g_list); spin_lock(&ci->i_ceph_lock); used = __ceph_caps_used(ci); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6712509ae1d6..a8c460393b01 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -181,8 +181,9 @@ struct ceph_cap { struct ceph_cap_flush { u64 tid; - int caps; /* 0 means capsnap */ + int caps; bool wake; /* wake up flush waiters when finish ? */ + bool is_capsnap; /* true means capsnap */ struct list_head g_list; // global struct list_head i_list; // per inode }; diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 9bd03a231032..171ad8b42107 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -358,14 +358,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, if (!dst) return NULL; cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, - NO_MAP_UNI_RSVD); + NO_MAP_UNI_RSVD); } else { - len = strnlen(src, maxlen); - len++; - dst = kmalloc(len, GFP_KERNEL); - if (!dst) - return NULL; - strlcpy(dst, src, len); + dst = kstrndup(src, maxlen, GFP_KERNEL); } return dst; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b1f0c05d6eaf..b11a919b9cab 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -425,8 +425,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, } /* if filetype is different, return error */ - if (unlikely(((*pinode)->i_mode & S_IFMT) != - (fattr.cf_mode & S_IFMT))) { + if (unlikely(inode_wrong_type(*pinode, fattr.cf_mode))) { CIFS_I(*pinode)->time = 0; /* force reval */ rc = -ESTALE; goto cgiiu_exit; @@ -1243,7 +1242,7 @@ cifs_find_inode(struct inode *inode, void *opaque) return 0; /* don't match inode of different type */ - if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) + if (inode_wrong_type(inode, fattr->cf_mode)) return 0; /* if it's not a directory or has no dentries, then flag it */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 1a0298d1e7cd..d58c5ffeca0d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -888,7 +888,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct) return 0; out_free_smb_buf: - kfree(smb_buf); + cifs_small_buf_release(smb_buf); sess_data->iov[0].iov_base = NULL; sess_data->iov[0].iov_len = 0; sess_data->buf0_type = CIFS_NO_BUFFER; diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 061418be4b08..4180371bf864 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -379,3 +379,47 @@ err_kfree: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(fscrypt_get_symlink); + +/** + * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks + * @path: the path for the encrypted symlink being queried + * @stat: the struct being filled with the symlink's attributes + * + * Override st_size of encrypted symlinks to be the length of the decrypted + * symlink target (or the no-key encoded symlink target, if the key is + * unavailable) rather than the length of the encrypted symlink target. This is + * necessary for st_size to match the symlink target that userspace actually + * sees. POSIX requires this, and some userspace programs depend on it. + * + * This requires reading the symlink target from disk if needed, setting up the + * inode's encryption key if possible, and then decrypting or encoding the + * symlink target. This makes lstat() more heavyweight than is normally the + * case. However, decrypted symlink targets will be cached in ->i_link, so + * usually the symlink won't have to be read and decrypted again later if/when + * it is actually followed, readlink() is called, or lstat() is called again. + * + * Return: 0 on success, -errno on failure + */ +int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + const char *link; + DEFINE_DELAYED_CALL(done); + + /* + * To get the symlink target that userspace will see (whether it's the + * decrypted target or the no-key encoded target), we can just get it in + * the same way the VFS does during path resolution and readlink(). + */ + link = READ_ONCE(inode->i_link); + if (!link) { + link = inode->i_op->get_link(dentry, inode, &done); + if (IS_ERR(link)) + return PTR_ERR(link); + } + stat->size = strlen(link); + do_delayed_call(&done); + return 0; +} +EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 686e0ad28788..3aa5eb9ce498 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -179,8 +179,10 @@ static int open_proxy_open(struct inode *inode, struct file *filp) if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && - real_fops->owner->state == MODULE_STATE_GOING) + real_fops->owner->state == MODULE_STATE_GOING) { + r = -ENXIO; goto out; + } #endif /* Huh? Module did not clean up after itself at exit? */ @@ -314,8 +316,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp) if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && - real_fops->owner->state == MODULE_STATE_GOING) + real_fops->owner->state == MODULE_STATE_GOING) { + r = -ENXIO; goto out; + } #endif /* Huh? Module did not cleanup after itself at exit? */ diff --git a/fs/exec.c b/fs/exec.c index c7a4ef8df305..ca89e0e3ef10 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1347,10 +1347,6 @@ int begin_new_exec(struct linux_binprm * bprm) WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); flush_signal_handlers(me, 0); - retval = set_cred_ucounts(bprm->cred); - if (retval < 0) - goto out_unlock; - /* * install the new credentials for this executable */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index b41512d1badc..0f7b53d5edea 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -750,6 +750,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); + /* + * ei->i_inline_off may have changed since ext4_write_begin() + * called ext4_try_to_write_inline_data() + */ + (void) ext4_find_inline_data_nolock(inode); + kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, pos, len); kunmap_atomic(kaddr); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index dd05af983092..a9457fed351e 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -52,10 +52,19 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, return paddr; } +static int ext4_encrypted_symlink_getattr(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + ext4_getattr(path, stat, request_mask, query_flags); + + return fscrypt_symlink_getattr(path, stat); +} + const struct inode_operations ext4_encrypted_symlink_inode_operations = { .get_link = ext4_encrypted_get_link, .setattr = ext4_setattr, - .getattr = ext4_getattr, + .getattr = ext4_encrypted_symlink_getattr, .listxattr = ext4_listxattr, }; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index f94b13075ea4..30987ea011f1 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1308,12 +1308,6 @@ out_destroy_crypt: for (--i; i >= 0; i--) fscrypt_finalize_bounce_page(&cc->cpages[i]); - for (i = 0; i < cc->nr_cpages; i++) { - if (!cc->cpages[i]) - continue; - f2fs_compress_free_page(cc->cpages[i]); - cc->cpages[i] = NULL; - } out_put_cic: kmem_cache_free(cic_entry_slab, cic); out_put_dnode: @@ -1324,6 +1318,12 @@ out_unlock_op: else f2fs_unlock_op(sbi); out_free: + for (i = 0; i < cc->nr_cpages; i++) { + if (!cc->cpages[i]) + continue; + f2fs_compress_free_page(cc->cpages[i]); + cc->cpages[i] = NULL; + } page_array_free(cc->inode, cc->cpages, cc->nr_cpages); cc->cpages = NULL; return -EAGAIN; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cfae2dddb0ba..1b11a42847c4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1550,7 +1550,21 @@ next_dnode: if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; + if (err == -ENOENT) { + /* + * There is one exceptional case that read_node_page() + * may return -ENOENT due to filesystem has been + * shutdown or cp_error, so force to convert error + * number to EIO for such case. + */ + if (map->m_may_create && + (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_cp_error(sbi))) { + err = -EIO; + goto unlock_out; + } + err = 0; if (map->m_next_pgofs) *map->m_next_pgofs = @@ -2205,6 +2219,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, continue; } unlock_page(page); + if (for_write) + put_page(page); cc->rpages[i] = NULL; cc->nr_rpages--; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4b9ef8bbfa4a..6694298b1660 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -938,6 +938,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); struct blk_plug plug; bool readdir_ra = sbi->readdir_ra == 1; + bool found_valid_dirent = false; int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); @@ -952,13 +953,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de = &d->dentry[bit_pos]; if (de->name_len == 0) { + if (found_valid_dirent || !bit_pos) { + printk_ratelimited( + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } bit_pos++; ctx->pos = start_pos + bit_pos; - printk_ratelimited( - "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, sbi->sb->s_id, - le32_to_cpu(de->ino)); - set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } @@ -1001,6 +1004,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); ctx->pos = start_pos + bit_pos; + found_valid_dirent = true; } out: if (readdir_ra) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5c74b2997197..1fbaab1f7aba 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -259,8 +259,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, }; unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb) || - is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + if (unlikely(f2fs_readonly(inode->i_sb))) return 0; trace_f2fs_sync_file_enter(inode); @@ -274,7 +273,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, ret = file_write_and_wait_range(file, start, end); clear_inode_flag(inode, FI_NEED_IPU); - if (ret) { + if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } @@ -1081,7 +1080,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (pg_start < pg_end) { - struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1093,8 +1091,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_inode_pages_range(mapping, blk_start, - blk_end - 1); + truncate_pagecache_range(inode, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = f2fs_truncate_hole(inode, pg_start, pg_end); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e02affb5c0e7..72f227f6ebad 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1477,8 +1477,10 @@ next_step: int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->i_gc_rwsem[READ])) + if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { + sbi->skipped_gc_rwsem++; continue; + } if (!down_write_trylock( &fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 17d0e5f4efec..710a6f73a685 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1307,9 +1307,18 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, return target; } +static int f2fs_encrypted_symlink_getattr(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + f2fs_getattr(path, stat, request_mask, query_flags); + + return fscrypt_symlink_getattr(path, stat); +} + const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .get_link = f2fs_encrypted_get_link, - .getattr = f2fs_getattr, + .getattr = f2fs_encrypted_symlink_getattr, .setattr = f2fs_setattr, .listxattr = f2fs_listxattr, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c52988067887..de543168b370 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1764,8 +1764,17 @@ restore_flag: static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { + int retry = DEFAULT_RETRY_IO_COUNT; + /* we should flush all the data to keep data consistency */ - sync_inodes_sb(sbi->sb); + do { + sync_inodes_sb(sbi->sb); + cond_resched(); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); + } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--); + + if (unlikely(retry < 0)) + f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); @@ -2197,6 +2206,33 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } +static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type) +{ + struct quota_info *dqopt = sb_dqopt(sbi->sb); + struct address_space *mapping = dqopt->files[type]->i_mapping; + int ret = 0; + + ret = dquot_writeback_dquots(sbi->sb, type); + if (ret) + goto out; + + ret = filemap_fdatawrite(mapping); + if (ret) + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + goto out; + + ret = filemap_fdatawait(mapping); + + truncate_inode_pages(&dqopt->files[type]->i_data, 0); +out: + if (ret) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + return ret; +} + int f2fs_quota_sync(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -2205,56 +2241,41 @@ int f2fs_quota_sync(struct super_block *sb, int type) int ret; /* - * do_quotactl - * f2fs_quota_sync - * down_read(quota_sem) - * dquot_writeback_dquots() - * f2fs_dquot_commit - * block_operation - * down_read(quota_sem) - */ - f2fs_lock_op(sbi); - - down_read(&sbi->quota_sem); - ret = dquot_writeback_dquots(sb, type); - if (ret) - goto out; - - /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - struct address_space *mapping; if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, cnt)) - continue; - mapping = dqopt->files[cnt]->i_mapping; + if (!sb_has_quota_active(sb, type)) + return 0; - ret = filemap_fdatawrite(mapping); - if (ret) - goto out; + inode_lock(dqopt->files[cnt]); - /* if we are using journalled quota */ - if (is_journalled_quota(sbi)) - continue; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + down_read(&sbi->quota_sem); - ret = filemap_fdatawait(mapping); - if (ret) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + ret = f2fs_quota_sync_file(sbi, cnt); + + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); - inode_lock(dqopt->files[cnt]); - truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); + + if (ret) + break; } -out: - if (ret) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); - f2fs_unlock_op(sbi); return ret; } @@ -2889,11 +2910,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - if (le32_to_cpu(raw_super->cp_payload) > - (blocks_per_seg - F2FS_CP_PACKS)) { - f2fs_info(sbi, "Insane cp_payload (%u > %u)", + if (le32_to_cpu(raw_super->cp_payload) >= + (blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_PERSIST_TYPE)) { + f2fs_info(sbi, "Insane cp_payload (%u >= %u)", le32_to_cpu(raw_super->cp_payload), - blocks_per_seg - F2FS_CP_PACKS); + blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_PERSIST_TYPE); return -EFSCORRUPTED; } @@ -2929,6 +2952,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count, valid_user_blocks; block_t avail_node_count, valid_node_count; + unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks; int i, j; total = le32_to_cpu(raw_super->segment_count); @@ -3049,6 +3073,17 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + nat_blocks = nat_segs << log_blocks_per_seg; + nat_bits_bytes = nat_blocks / BITS_PER_BYTE; + nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); + if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) && + (cp_payload + F2FS_CP_PACKS + + NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) { + f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)", + cp_payload, nat_bits_blocks); + return -EFSCORRUPTED; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_err(sbi, "A bug case: need to run fsck"); return 1; diff --git a/fs/fcntl.c b/fs/fcntl.c index 05b36b28f2e8..71b43538fa44 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -995,13 +995,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - read_lock(&fa->fa_lock); + read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -1010,7 +1011,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - read_unlock(&fa->fa_lock); + read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 751bc5b1cddf..6104f627cc71 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -74,10 +74,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie) static int fscache_set_key(struct fscache_cookie *cookie, const void *index_key, size_t index_key_len) { - unsigned long long h; u32 *buf; int bufs; - int i; bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); @@ -91,17 +89,7 @@ static int fscache_set_key(struct fscache_cookie *cookie, } memcpy(buf, index_key, index_key_len); - - /* Calculate a hash and combine this with the length in the first word - * or first half word - */ - h = (unsigned long)cookie->parent; - h += index_key_len + cookie->type; - - for (i = 0; i < bufs; i++) - h += buf[i]; - - cookie->key_hash = h ^ (h >> 32); + cookie->key_hash = fscache_hash(0, buf, bufs); return 0; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 08e91efbce53..64aa552b296d 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -97,6 +97,8 @@ extern struct workqueue_struct *fscache_object_wq; extern struct workqueue_struct *fscache_op_wq; DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); +extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n); + static inline bool fscache_object_congested(void) { return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); diff --git a/fs/fscache/main.c b/fs/fscache/main.c index c1e6cc9091aa..4207f98e405f 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -94,6 +94,45 @@ static struct ctl_table fscache_sysctls_root[] = { #endif /* + * Mixing scores (in bits) for (7,20): + * Input delta: 1-bit 2-bit + * 1 round: 330.3 9201.6 + * 2 rounds: 1246.4 25475.4 + * 3 rounds: 1907.1 31295.1 + * 4 rounds: 2042.3 31718.6 + * Perfect: 2048 31744 + * (32*64) (32*31/2 * 64) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol32(x, 7),\ + x += y, y = rol32(y,20),\ + y *= 9 ) + +static inline unsigned int fold_hash(unsigned long x, unsigned long y) +{ + /* Use arch-optimized multiply if one exists */ + return __hash_32(y ^ __hash_32(x)); +} + +/* + * Generate a hash. This is derived from full_name_hash(), but we want to be + * sure it is arch independent and that it doesn't change as bits of the + * computed hash value might appear on disk. The caller also guarantees that + * the hashed data will be a series of aligned 32-bit words. + */ +unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n) +{ + unsigned int a, x = 0, y = salt; + + for (; n; n--) { + a = *data++; + HASH_MIX(x, y, a); + } + return fold_hash(x, y); +} + +/* * initialise the fs caching module */ static int __init fscache_init(void) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 756bbdd563e0..2e300176cb88 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -252,7 +252,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (ret == -ENOMEM) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || - (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + fuse_stale_inode(inode, outarg.generation, &outarg.attr)) goto invalid; forget_all_cached_acls(inode); @@ -1062,7 +1062,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, err = fuse_simple_request(fm, &args); if (!err) { if (fuse_invalid_attr(&outarg.attr) || - (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; } else { @@ -1699,7 +1699,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } if (fuse_invalid_attr(&outarg.attr) || - (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; goto error; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8de9c24ac4ac..c9606f2d2864 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -194,12 +194,11 @@ void fuse_finish_open(struct inode *inode, struct file *file) struct fuse_file *ff = file->private_data; struct fuse_conn *fc = get_fuse_conn(inode); - if (!(ff->open_flags & FOPEN_KEEP_CACHE)) - invalidate_inode_pages2(inode->i_mapping); if (ff->open_flags & FOPEN_STREAM) stream_open(inode, file); else if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); + if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -207,10 +206,14 @@ void fuse_finish_open(struct inode *inode, struct file *file) fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, 0); spin_unlock(&fi->lock); + truncate_pagecache(inode, 0); fuse_invalidate_attr(inode); if (fc->writeback_cache) file_update_time(file); + } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { + invalidate_inode_pages2(inode->i_mapping); } + if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) fuse_link_write_file(file); } @@ -3237,7 +3240,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) { - int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + int err = filemap_write_and_wait_range(inode->i_mapping, start, -1); if (!err) fuse_sync_writes(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 8150621101c6..ff94da684017 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -860,6 +860,13 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline bool fuse_stale_inode(const struct inode *inode, int generation, + struct fuse_attr *attr) +{ + return inode->i_generation != generation || + inode_wrong_type(inode, attr->mode); +} + static inline void fuse_make_bad(struct inode *inode) { remove_inode_hash(inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f94b0bb57619..053c56af3b6f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -340,8 +340,8 @@ retry: inode->i_generation = generation; fuse_init_inode(inode, attr); unlock_new_inode(inode); - } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { - /* Inode has changed type, any I/O on the old should fail */ + } else if (fuse_stale_inode(inode, generation, attr)) { + /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); iput(inode); goto retry; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3441ffa740f3..bc267832310c 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -200,9 +200,12 @@ retry: if (!d_in_lookup(dentry)) { struct fuse_inode *fi; inode = d_inode(dentry); + if (inode && get_node_id(inode) != o->nodeid) + inode = NULL; if (!inode || - get_node_id(inode) != o->nodeid || - ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + fuse_stale_inode(inode, o->generation, &o->attr)) { + if (inode) + fuse_make_bad(inode); d_invalidate(dentry); dput(dentry); goto retry; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3faa421568b0..bf539eab92c6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -623,16 +623,13 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); - if (error) - gfs2_consist(sdp); - if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) - gfs2_consist(sdp); - - /* Initialize some head of the log stuff */ - if (!gfs2_withdrawn(sdp)) { - sdp->sd_log_sequence = head.lh_sequence + 1; - gfs2_log_pointers_init(sdp, head.lh_blkno); - } + if (gfs2_assert_withdraw_delayed(sdp, !error)) + return error; + if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags & + GFS2_LOG_HEAD_UNMOUNT)) + return -EIO; + sdp->sd_log_sequence = head.lh_sequence + 1; + gfs2_log_pointers_init(sdp, head.lh_blkno); } return 0; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 153272f82984..5564aa8b4592 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -296,6 +296,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); + /* don't want to call dlm if we've unmounted the lock protocol */ + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + gfs2_glock_free(gl); + return; + } /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ae9c5c1bdc50..b9ed6a6dbcf5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -660,6 +660,7 @@ static int init_statfs(struct gfs2_sbd *sdp) error = PTR_ERR(lsi->si_sc_inode); fs_err(sdp, "can't find local \"sc\" file#%u: %d\n", jd->jd_jid, error); + kfree(lsi); goto free_local; } lsi->si_jid = jd->jd_jid; @@ -1071,6 +1072,34 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp) kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp); } +static int init_threads(struct gfs2_sbd *sdp) +{ + struct task_struct *p; + int error = 0; + + p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start logd thread: %d\n", error); + return error; + } + sdp->sd_logd_process = p; + + p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start quotad thread: %d\n", error); + goto fail; + } + sdp->sd_quotad_process = p; + return 0; + +fail: + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; + return error; +} + /** * gfs2_fill_super - Read in superblock * @sb: The VFS superblock @@ -1197,6 +1226,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_per_node; } + if (!sb_rdonly(sb)) { + error = init_threads(sdp); + if (error) { + gfs2_withdraw_delayed(sdp); + goto fail_per_node; + } + } + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); if (error) goto fail_per_node; @@ -1206,6 +1243,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) gfs2_freeze_unlock(&freeze_gh); if (error) { + if (sdp->sd_quotad_process) + kthread_stop(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + if (sdp->sd_logd_process) + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; fs_err(sdp, "can't make FS RW: %d\n", error); goto fail_per_node; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 077dc8c035a8..6a355e1347d7 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -126,34 +126,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) return 0; } -static int init_threads(struct gfs2_sbd *sdp) -{ - struct task_struct *p; - int error = 0; - - p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); - if (IS_ERR(p)) { - error = PTR_ERR(p); - fs_err(sdp, "can't start logd thread: %d\n", error); - return error; - } - sdp->sd_logd_process = p; - - p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); - if (IS_ERR(p)) { - error = PTR_ERR(p); - fs_err(sdp, "can't start quotad thread: %d\n", error); - goto fail; - } - sdp->sd_quotad_process = p; - return 0; - -fail: - kthread_stop(sdp->sd_logd_process); - sdp->sd_logd_process = NULL; - return error; -} - /** * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one * @sdp: the filesystem @@ -168,26 +140,17 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) struct gfs2_log_header_host head; int error; - error = init_threads(sdp); - if (error) { - gfs2_withdraw_delayed(sdp); - return error; - } - j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); - if (gfs2_withdrawn(sdp)) { - error = -EIO; - goto fail; - } + if (gfs2_withdrawn(sdp)) + return -EIO; error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); if (error || gfs2_withdrawn(sdp)) - goto fail; + return error; if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { gfs2_consist(sdp); - error = -EIO; - goto fail; + return -EIO; } /* Initialize some head of the log stuff */ @@ -195,20 +158,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) gfs2_log_pointers_init(sdp, head.lh_blkno); error = gfs2_quota_init(sdp); - if (error || gfs2_withdrawn(sdp)) - goto fail; - - set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - - return 0; - -fail: - if (sdp->sd_quotad_process) - kthread_stop(sdp->sd_quotad_process); - sdp->sd_quotad_process = NULL; - if (sdp->sd_logd_process) - kthread_stop(sdp->sd_logd_process); - sdp->sd_logd_process = NULL; + if (!error && !gfs2_withdrawn(sdp)) + set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); return error; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 8bb17b6d4de3..3d5fc76b92d0 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -895,7 +895,7 @@ append: static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - int work_flags; + bool do_wake; unsigned long flags; /* @@ -909,14 +909,14 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) return; } - work_flags = work->flags; raw_spin_lock_irqsave(&wqe->lock, flags); io_wqe_insert_work(wqe, work); wqe->flags &= ~IO_WQE_FLAG_STALLED; + do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) || + !atomic_read(&acct->nr_running); raw_spin_unlock_irqrestore(&wqe->lock, flags); - if ((work_flags & IO_WQ_WORK_CONCURRENT) || - !atomic_read(&acct->nr_running)) + if (do_wake) io_wqe_wake_worker(wqe, acct); } diff --git a/fs/io_uring.c b/fs/io_uring.c index ed641dca7957..d0089039fee7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -889,6 +889,7 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_WRITE] = { .needs_file = 1, + .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, .async_size = sizeof(struct io_async_rw), @@ -1497,6 +1498,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) ret = hrtimer_try_to_cancel(&io->timer); if (ret != -1) { + if (status) + req_set_fail_links(req); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); @@ -3125,7 +3128,7 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, ret = import_single_range(rw, buf, sqe_len, *iovec, iter); *iovec = NULL; - return ret < 0 ? ret : sqe_len; + return ret; } if (req->flags & REQ_F_BUFFER_SELECT) { @@ -3151,7 +3154,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, if (!iorw) return __io_import_iovec(rw, req, iovec, iter, needs_lock); *iovec = NULL; - return iov_iter_count(&iorw->iter); + return 0; } static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) @@ -3410,7 +3413,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; ssize_t io_size, ret, ret2; - size_t iov_count; bool no_async; if (rw) @@ -3419,8 +3421,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; - iov_count = iov_iter_count(iter); - io_size = ret; + io_size = iov_iter_count(iter); req->result = io_size; ret = 0; @@ -3436,7 +3437,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (no_async) goto copy_iov; - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size); if (unlikely(ret)) goto out_free; @@ -3455,7 +3456,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (req->file->f_flags & O_NONBLOCK) goto done; /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, iov_count - iov_iter_count(iter)); + iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; goto copy_iov; } else if (ret < 0) { @@ -3539,7 +3540,6 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - size_t iov_count; ssize_t ret, ret2, io_size; if (rw) @@ -3548,8 +3548,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; - iov_count = iov_iter_count(iter); - io_size = ret; + io_size = iov_iter_count(iter); req->result = io_size; /* Ensure we clear previously set non-block flag */ @@ -3567,7 +3566,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, (req->flags & REQ_F_ISREG)) goto copy_iov; - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), iov_count); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size); if (unlikely(ret)) goto out_free; @@ -3610,7 +3609,7 @@ done: } else { copy_iov: /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, iov_count - iov_iter_count(iter)); + iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (!ret) return -EAGAIN; @@ -3745,7 +3744,8 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) + if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || + sqe->splice_fd_in)) return -EINVAL; req->sync.flags = READ_ONCE(sqe->fsync_flags); @@ -3778,7 +3778,8 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock) static int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->ioprio || sqe->buf_index || sqe->rw_flags) + if (sqe->ioprio || sqe->buf_index || sqe->rw_flags || + sqe->splice_fd_in) return -EINVAL; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -3809,7 +3810,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe const char __user *fname; int ret; - if (unlikely(sqe->ioprio || sqe->buf_index)) + if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; @@ -3925,7 +3926,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req, struct io_provide_buf *p = &req->pbuf; u64 tmp; - if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off) + if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off || + sqe->splice_fd_in) return -EINVAL; tmp = READ_ONCE(sqe->fd); @@ -4001,7 +4003,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req, struct io_provide_buf *p = &req->pbuf; u64 tmp; - if (sqe->ioprio || sqe->rw_flags) + if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; tmp = READ_ONCE(sqe->fd); @@ -4094,7 +4096,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_EPOLL) - if (sqe->ioprio || sqe->buf_index) + if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) return -EINVAL; @@ -4140,7 +4142,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock, static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) - if (sqe->ioprio || sqe->buf_index || sqe->off) + if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -4175,7 +4177,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock) static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->ioprio || sqe->buf_index || sqe->addr) + if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -4213,7 +4215,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) return -EINVAL; - if (sqe->ioprio || sqe->buf_index) + if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; @@ -4260,7 +4262,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || - sqe->rw_flags || sqe->buf_index) + sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; @@ -4316,7 +4318,8 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) + if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || + sqe->splice_fd_in)) return -EINVAL; req->sync.off = READ_ONCE(sqe->off); @@ -4759,7 +4762,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; - if (sqe->ioprio || sqe->len || sqe->buf_index) + if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -4800,7 +4803,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; - if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) + if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags || + sqe->splice_fd_in) return -EINVAL; conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -5552,7 +5556,8 @@ static int io_timeout_remove_prep(struct io_kiocb *req, return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; - if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags) + if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags | + sqe->splice_fd_in) return -EINVAL; req->timeout_rem.addr = READ_ONCE(sqe->addr); @@ -5589,7 +5594,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (sqe->ioprio || sqe->buf_index || sqe->len != 1) + if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || + sqe->splice_fd_in) return -EINVAL; if (off && is_timeout_link) return -EINVAL; @@ -5733,7 +5739,8 @@ static int io_async_cancel_prep(struct io_kiocb *req, return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; - if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags) + if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags || + sqe->splice_fd_in) return -EINVAL; req->cancel.addr = READ_ONCE(sqe->addr); @@ -7382,7 +7389,7 @@ static int io_sqe_alloc_file_tables(struct fixed_file_data *file_data, this_files = min(nr_files, IORING_MAX_FILES_TABLE); table->files = kcalloc(this_files, sizeof(struct file *), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!table->files) break; nr_files -= this_files; @@ -7578,8 +7585,10 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; if (nr_args > IORING_MAX_FIXED_FILES) return -EMFILE; + if (nr_args > rlimit(RLIMIT_NOFILE)) + return -EMFILE; - file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL); + file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL_ACCOUNT); if (!file_data) return -ENOMEM; file_data->ctx = ctx; @@ -7589,7 +7598,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE); file_data->table = kcalloc(nr_tables, sizeof(*file_data->table), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!file_data->table) goto out_free; @@ -9078,9 +9087,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); - ret = -EOWNERDEAD; - if (unlikely(ctx->sqo_dead)) + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; goto out; + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { @@ -9601,11 +9611,12 @@ static int io_register_personality(struct io_ring_ctx *ctx) ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)iod, XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); - if (!ret) - return id; - put_cred(iod->creds); - kfree(iod); - return ret; + if (ret < 0) { + put_cred(iod->creds); + kfree(iod); + return ret; + } + return id; } static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 10cc7979ce38..caed9d98c64a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1045,7 +1045,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page, if (error) { SetPageError(page); - mapping_set_error(inode->i_mapping, -EIO); + mapping_set_error(inode->i_mapping, error); } WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop); diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c index a5e478de1417..2ceea45aefd8 100644 --- a/fs/iomap/swapfile.c +++ b/fs/iomap/swapfile.c @@ -30,11 +30,16 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi) { struct iomap *iomap = &isi->iomap; unsigned long nr_pages; + unsigned long max_pages; uint64_t first_ppage; uint64_t first_ppage_reported; uint64_t next_ppage; int error; + if (unlikely(isi->nr_pages >= isi->sis->max)) + return 0; + max_pages = isi->sis->max - isi->nr_pages; + /* * Round the start up and the end down so that the physical * extent aligns to a page boundary. @@ -47,6 +52,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi) if (first_ppage >= next_ppage) return 0; nr_pages = next_ppage - first_ppage; + nr_pages = min(nr_pages, max_pages); /* * Calculate how much swap space we're adding; the first page contains diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index ec90773527ee..35675a1065be 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -155,7 +155,6 @@ struct iso9660_options{ unsigned int overriderockperm:1; unsigned int uid_set:1; unsigned int gid_set:1; - unsigned int utf8:1; unsigned char map; unsigned char check; unsigned int blocksize; @@ -355,7 +354,6 @@ static int parse_options(char *options, struct iso9660_options *popt) popt->gid = GLOBAL_ROOT_GID; popt->uid = GLOBAL_ROOT_UID; popt->iocharset = NULL; - popt->utf8 = 0; popt->overriderockperm = 0; popt->session=-1; popt->sbsector=-1; @@ -388,10 +386,13 @@ static int parse_options(char *options, struct iso9660_options *popt) case Opt_cruft: popt->cruft = 1; break; +#ifdef CONFIG_JOLIET case Opt_utf8: - popt->utf8 = 1; + kfree(popt->iocharset); + popt->iocharset = kstrdup("utf8", GFP_KERNEL); + if (!popt->iocharset) + return 0; break; -#ifdef CONFIG_JOLIET case Opt_iocharset: kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); @@ -494,7 +495,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) if (sbi->s_nocompress) seq_puts(m, ",nocompress"); if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm"); if (sbi->s_showassoc) seq_puts(m, ",showassoc"); - if (sbi->s_utf8) seq_puts(m, ",utf8"); if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check); if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping); @@ -517,9 +517,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",fmode=%o", sbi->s_fmode); #ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset && - strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) + if (sbi->s_nls_iocharset) seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); + else + seq_puts(m, ",iocharset=utf8"); #endif return 0; } @@ -862,14 +863,13 @@ root_found: sbi->s_nls_iocharset = NULL; #ifdef CONFIG_JOLIET - if (joliet_level && opt.utf8 == 0) { + if (joliet_level) { char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT; - sbi->s_nls_iocharset = load_nls(p); - if (! sbi->s_nls_iocharset) { - /* Fail only if explicit charset specified */ - if (opt.iocharset) + if (strcmp(p, "utf8") != 0) { + sbi->s_nls_iocharset = opt.iocharset ? + load_nls(opt.iocharset) : load_nls_default(); + if (!sbi->s_nls_iocharset) goto out_freesbi; - sbi->s_nls_iocharset = load_nls_default(); } } #endif @@ -885,7 +885,6 @@ root_found: sbi->s_gid = opt.gid; sbi->s_uid_set = opt.uid_set; sbi->s_gid_set = opt.gid_set; - sbi->s_utf8 = opt.utf8; sbi->s_nocompress = opt.nocompress; sbi->s_overriderockperm = opt.overriderockperm; /* diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 055ec6c586f7..dcdc191ed183 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -44,7 +44,6 @@ struct isofs_sb_info { unsigned char s_session; unsigned int s_high_sierra:1; unsigned int s_rock:2; - unsigned int s_utf8:1; unsigned int s_cruft:1; /* Broken disks with high byte of length * containing junk */ unsigned int s_nocompress:1; diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index be8b6a9d0b92..c0f04a1e7f69 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { - unsigned char utf8; struct nls_table *nls; unsigned char len = 0; - utf8 = ISOFS_SB(inode->i_sb)->s_utf8; nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; - if (utf8) { + if (!nls) { len = utf16s_to_utf8s((const wchar_t *) de->name, de->name_len[0] >> 1, UTF16_BIG_ENDIAN, outname, PAGE_SIZE); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 61d3cc2283dc..273a81971ed5 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -395,28 +395,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock) nlmsvc_put_lockowner(lock->fl.fl_owner); } -static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl) -{ - struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner; - new->fl_owner = nlmsvc_get_lockowner(nlm_lo); -} - -static void nlmsvc_locks_release_private(struct file_lock *fl) -{ - nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner); -} - -static const struct file_lock_operations nlmsvc_lock_ops = { - .fl_copy_lock = nlmsvc_locks_copy_lock, - .fl_release_private = nlmsvc_locks_release_private, -}; - void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host, pid_t pid) { fl->fl_owner = nlmsvc_find_lockowner(host, pid); - if (fl->fl_owner != NULL) - fl->fl_ops = &nlmsvc_lock_ops; } /* @@ -634,7 +616,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; + conflock->svid = lock->fl.fl_pid; conflock->fl.fl_type = lock->fl.fl_type; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; @@ -788,9 +770,21 @@ nlmsvc_notify_blocked(struct file_lock *fl) printk(KERN_WARNING "lockd: notification for unknown block!\n"); } +static fl_owner_t nlmsvc_get_owner(fl_owner_t owner) +{ + return nlmsvc_get_lockowner(owner); +} + +static void nlmsvc_put_owner(fl_owner_t owner) +{ + nlmsvc_put_lockowner(owner); +} + const struct lock_manager_operations nlmsvc_lock_operations = { .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, + .lm_get_owner = nlmsvc_get_owner, + .lm_put_owner = nlmsvc_put_owner, }; /* diff --git a/fs/namespace.c b/fs/namespace.c index 175312428cdf..046b084136c5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1697,8 +1697,12 @@ static inline bool may_mount(void) } #ifdef CONFIG_MANDATORY_FILE_LOCKING -static inline bool may_mandlock(void) +static bool may_mandlock(void) { + pr_warn_once("======================================================\n" + "WARNING: the mand mount option is being deprecated and\n" + " will be removed in v5.15!\n" + "======================================================\n"); return capable(CAP_SYS_ADMIN); } #else diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9811880470a0..21addb78523d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -322,7 +322,7 @@ nfs_find_actor(struct inode *inode, void *opaque) if (NFS_FILEID(inode) != fattr->fileid) return 0; - if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) + if (inode_wrong_type(inode, fattr->mode)) return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; @@ -1446,7 +1446,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; return -ESTALE; } - if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) return -ESTALE; @@ -1861,7 +1861,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Make sure the inode's type hasn't changed. */ - if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) { /* * Big trouble! The inode has become a different object. */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 371665e0c154..5370e082aded 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -335,7 +335,7 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) { - if (pnfs_seqid_is_newer(newseq, lo->plh_barrier)) + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier) lo->plh_barrier = newseq; } @@ -347,11 +347,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, iomode = IOMODE_ANY; lo->plh_return_iomode = iomode; set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + /* + * We must set lo->plh_return_seq to avoid livelocks with + * pnfs_layout_need_return() + */ + if (seq == 0) + seq = be32_to_cpu(lo->plh_stateid.seqid); + if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) lo->plh_return_seq = seq; - pnfs_barrier_update(lo, seq); - } + pnfs_barrier_update(lo, seq); } static void @@ -1000,7 +1004,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, { u32 seqid = be32_to_cpu(stateid->seqid); - return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier; + return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid); } /* lget is set to 1 if called from inside send_layoutget call chain */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 80e394a2e3fd..0313390fa4b4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2646,9 +2646,9 @@ static void force_expire_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; - spin_lock(&clp->cl_lock); + spin_lock(&nn->client_lock); clp->cl_time = 0; - spin_unlock(&clp->cl_lock); + spin_unlock(&nn->client_lock); wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); spin_lock(&nn->client_lock); @@ -6855,8 +6855,7 @@ out: /* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to - * vfs_test_lock. (Arguably perhaps test_lock should be done with an - * inode operation.) + * vfs_test_lock. */ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { @@ -6871,7 +6870,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct NFSD_MAY_READ)); if (err) goto out; + lock->fl_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); + lock->fl_file = NULL; out: fh_unlock(fhp); nfsd_file_put(nf); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0d71549f9d42..9c9de2b66e64 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -376,7 +376,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) /* Make sure the type and device matches */ resp->status = nfserr_exist; - if (inode && type != (inode->i_mode & S_IFMT)) + if (inode && inode_wrong_type(inode, type)) goto out_unlock; } diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 1192c9953620..c3af99e94f1d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -129,11 +129,15 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn, return false; } +/* Limit event merges to limit CPU overhead per event */ +#define FANOTIFY_MAX_MERGE_EVENTS 128 + /* and the list better be locked by something too! */ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; struct fanotify_event *new; + int i = 0; pr_debug("%s: list=%p event=%p\n", __func__, list, event); new = FANOTIFY_E(event); @@ -147,6 +151,8 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) return 0; list_for_each_entry_reverse(test_event, list, list) { + if (++i > FANOTIFY_MAX_MERGE_EVENTS) + break; if (fanotify_should_merge(test_event, event)) { FANOTIFY_E(test_event)->mask |= new->mask; return 1; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index d1efa3a5a503..08b595c526d7 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -542,8 +542,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_cleanup; } err = ovl_instantiate(dentry, inode, newdentry, hardlink); - if (err) - goto out_cleanup; + if (err) { + ovl_cleanup(udir, newdentry); + dput(newdentry); + } out_dput: dput(upper); out_unlock: diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index ed35be3fafc6..f469982dcb36 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -390,6 +390,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -404,7 +405,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f3309e044f07..092812c2f118 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -366,7 +366,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, return PTR_ERR(origin); if (upperdentry && !ovl_is_whiteout(upperdentry) && - ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) + inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode)) goto invalid; if (!*stackp) @@ -724,7 +724,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = ERR_PTR(-ESTALE); goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || - ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + inode_wrong_type(inode, d_inode(origin)->i_mode)) { /* * Index should always be of the same file type as origin * except for the case of a whiteout index. A whiteout diff --git a/fs/pipe.c b/fs/pipe.c index 28b2e973f10e..d6d4019ba32f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -363,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * _very_ unlikely case that the pipe was full, but we got * no data. */ - if (unlikely(was_full)) { + if (unlikely(was_full)) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); /* * But because we didn't read anything, at this point we can @@ -385,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = false; __pipe_unlock(pipe); - if (was_full) { + if (was_full) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } if (wake_next_reader) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); if (ret > 0) file_accessed(filp); return ret; @@ -444,9 +442,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Epoll nonsensically wants a wakeup whether the pipe - * was already empty or not. - * * If it wasn't empty we try to merge new data into * the last buffer. * @@ -455,9 +450,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * spanning multiple pages. */ head = pipe->head; - was_empty = true; + was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); - if (chars && !pipe_empty(head, pipe->tail)) { + if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -568,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * become empty while we dropped the lock. */ __pipe_unlock(pipe); - if (was_empty) { + if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); @@ -590,11 +584,13 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); if (wake_next_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { @@ -654,6 +650,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b77d1637bbbc..f4826b6da682 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1629,6 +1629,16 @@ static const char *ubifs_get_link(struct dentry *dentry, return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } +static int ubifs_symlink_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + ubifs_getattr(path, stat, request_mask, query_flags); + + if (IS_ENCRYPTED(d_inode(path->dentry))) + return fscrypt_symlink_getattr(path, stat); + return 0; +} + const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, @@ -1654,7 +1664,7 @@ const struct inode_operations ubifs_file_inode_operations = { const struct inode_operations ubifs_symlink_inode_operations = { .get_link = ubifs_get_link, .setattr = ubifs_setattr, - .getattr = ubifs_getattr, + .getattr = ubifs_symlink_getattr, #ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, #endif diff --git a/fs/udf/misc.c b/fs/udf/misc.c index eab94527340d..1614d308d0f0 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, else offset = le32_to_cpu(eahd->appAttrLocation); - while (offset < iinfo->i_lenEAttr) { + while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) { + uint32_t attrLength; + gaf = (struct genericFormat *)&ea[offset]; + attrLength = le32_to_cpu(gaf->attrLength); + + /* Detect undersized elements and buffer overflows */ + if ((attrLength < sizeof(*gaf)) || + (attrLength > (iinfo->i_lenEAttr - offset))) + break; + if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) return gaf; else - offset += le32_to_cpu(gaf->attrLength); + offset += attrLength; } } diff --git a/fs/udf/super.c b/fs/udf/super.c index d0df217f4712..5d2b820ef303 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -108,16 +108,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) return NULL; lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; partnum = le32_to_cpu(lvid->numOfPartitions); - if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) - - offsetof(struct logicalVolIntegrityDesc, impUse)) / - (2 * sizeof(uint32_t)) < partnum) { - udf_err(sb, "Logical volume integrity descriptor corrupted " - "(numOfPartitions = %u)!\n", partnum); - return NULL; - } /* The offset is to skip freeSpaceTable and sizeTable arrays */ offset = partnum * 2 * sizeof(uint32_t); - return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); + return (struct logicalVolIntegrityDescImpUse *) + (((uint8_t *)(lvid + 1)) + offset); } /* UDF filesystem type */ @@ -349,10 +343,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) - seq_puts(seq, ",utf8"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) + if (sbi->s_nls_map) seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset); + else + seq_puts(seq, ",iocharset=utf8"); return 0; } @@ -557,19 +551,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt, /* Ignored (never implemented properly) */ break; case Opt_utf8: - uopt->flags |= (1 << UDF_FLAG_UTF8); + if (!remount) { + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } break; case Opt_iocharset: if (!remount) { - if (uopt->nls_map) - unload_nls(uopt->nls_map); - /* - * load_nls() failure is handled later in - * udf_fill_super() after all options are - * parsed. - */ + unload_nls(uopt->nls_map); + uopt->nls_map = NULL; + } + /* When nls_map is not loaded then UTF-8 is used */ + if (!remount && strcmp(args[0].from, "utf8") != 0) { uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + if (!uopt->nls_map) { + pr_err("iocharset %s not found\n", + args[0].from); + return 0; + } } break; case Opt_uforget: @@ -1541,6 +1540,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ struct udf_sb_info *sbi = UDF_SB(sb); struct logicalVolIntegrityDesc *lvid; int indirections = 0; + u32 parts, impuselen; while (++indirections <= UDF_MAX_LVID_NESTING) { final_bh = NULL; @@ -1567,15 +1567,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data; if (lvid->nextIntegrityExt.extLength == 0) - return; + goto check; loc = leea_to_cpu(lvid->nextIntegrityExt); } udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n", UDF_MAX_LVID_NESTING); +out_err: brelse(sbi->s_lvid_bh); sbi->s_lvid_bh = NULL; + return; +check: + parts = le32_to_cpu(lvid->numOfPartitions); + impuselen = le32_to_cpu(lvid->lengthOfImpUse); + if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize || + sizeof(struct logicalVolIntegrityDesc) + impuselen + + 2 * parts * sizeof(u32) > sb->s_blocksize) { + udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), " + "ignoring.\n", parts, impuselen); + goto out_err; + } } /* @@ -2138,21 +2150,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!udf_parse_options((char *)options, &uopt, false)) goto parse_options_failure; - if (uopt.flags & (1 << UDF_FLAG_UTF8) && - uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { - udf_err(sb, "utf8 cannot be combined with iocharset\n"); - goto parse_options_failure; - } - if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { - uopt.nls_map = load_nls_default(); - if (!uopt.nls_map) - uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP); - else - udf_debug("Using default NLS map\n"); - } - if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP))) - uopt.flags |= (1 << UDF_FLAG_UTF8); - fileset.logicalBlockNum = 0xFFFFFFFF; fileset.partitionReferenceNum = 0xFFFF; @@ -2307,8 +2304,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) error_out: iput(sbi->s_vat_inode); parse_options_failure: - if (uopt.nls_map) - unload_nls(uopt.nls_map); + unload_nls(uopt.nls_map); if (lvid_open) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); @@ -2358,8 +2354,7 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb); iput(sbi->s_vat_inode); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(sbi->s_nls_map); + unload_nls(sbi->s_nls_map); if (!sb_rdonly(sb)) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 758efe557a19..4fa620543d30 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -20,8 +20,6 @@ #define UDF_FLAG_UNDELETE 6 #define UDF_FLAG_UNHIDE 7 #define UDF_FLAG_VARCONV 8 -#define UDF_FLAG_NLS_MAP 9 -#define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ #define UDF_FLAG_GID_FORGET 12 #define UDF_FLAG_UID_SET 13 diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 5fcfa96463eb..622569007b53 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb, return 0; } - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->uni2char; else conv_f = NULL; @@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb, if (ocu_max_len <= 0) return 0; - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) + if (UDF_SB(sb)->s_nls_map) conv_f = UDF_SB(sb)->s_nls_map->char2uni; else conv_f = NULL; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3d181b1a6d56..17397c7532f1 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -32,11 +32,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly = 1; static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; -enum userfaultfd_state { - UFFD_STATE_WAIT_API, - UFFD_STATE_RUNNING, -}; - /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. @@ -68,8 +63,6 @@ struct userfaultfd_ctx { unsigned int flags; /* features requested from the userspace */ unsigned int features; - /* state machine */ - enum userfaultfd_state state; /* released */ bool released; /* memory mappings are changing because of non-cooperative event */ @@ -103,6 +96,14 @@ struct userfaultfd_wake_range { unsigned long len; }; +/* internal indication that UFFD_API ioctl was successfully executed */ +#define UFFD_FEATURE_INITIALIZED (1u << 31) + +static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) +{ + return ctx->features & UFFD_FEATURE_INITIALIZED; +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -659,7 +660,6 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) refcount_set(&ctx->refcount, 1); ctx->flags = octx->flags; - ctx->state = UFFD_STATE_RUNNING; ctx->features = octx->features; ctx->released = false; ctx->mmap_changing = false; @@ -936,38 +936,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->fd_wqh, wait); - switch (ctx->state) { - case UFFD_STATE_WAIT_API: + if (!userfaultfd_is_initialized(ctx)) return EPOLLERR; - case UFFD_STATE_RUNNING: - /* - * poll() never guarantees that read won't block. - * userfaults can be waken before they're read(). - */ - if (unlikely(!(file->f_flags & O_NONBLOCK))) - return EPOLLERR; - /* - * lockless access to see if there are pending faults - * __pollwait last action is the add_wait_queue but - * the spin_unlock would allow the waitqueue_active to - * pass above the actual list_add inside - * add_wait_queue critical section. So use a full - * memory barrier to serialize the list_add write of - * add_wait_queue() with the waitqueue_active read - * below. - */ - ret = 0; - smp_mb(); - if (waitqueue_active(&ctx->fault_pending_wqh)) - ret = EPOLLIN; - else if (waitqueue_active(&ctx->event_wqh)) - ret = EPOLLIN; - return ret; - default: - WARN_ON_ONCE(1); + /* + * poll() never guarantees that read won't block. + * userfaults can be waken before they're read(). + */ + if (unlikely(!(file->f_flags & O_NONBLOCK))) return EPOLLERR; - } + /* + * lockless access to see if there are pending faults + * __pollwait last action is the add_wait_queue but + * the spin_unlock would allow the waitqueue_active to + * pass above the actual list_add inside + * add_wait_queue critical section. So use a full + * memory barrier to serialize the list_add write of + * add_wait_queue() with the waitqueue_active read + * below. + */ + ret = 0; + smp_mb(); + if (waitqueue_active(&ctx->fault_pending_wqh)) + ret = EPOLLIN; + else if (waitqueue_active(&ctx->event_wqh)) + ret = EPOLLIN; + + return ret; } static const struct file_operations userfaultfd_fops; @@ -1161,7 +1156,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, struct uffd_msg msg; int no_wait = file->f_flags & O_NONBLOCK; - if (ctx->state == UFFD_STATE_WAIT_API) + if (!userfaultfd_is_initialized(ctx)) return -EINVAL; for (;;) { @@ -1816,9 +1811,10 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, static inline unsigned int uffd_ctx_features(__u64 user_features) { /* - * For the current set of features the bits just coincide + * For the current set of features the bits just coincide. Set + * UFFD_FEATURE_INITIALIZED to mark the features as enabled. */ - return (unsigned int)user_features; + return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED; } /* @@ -1831,12 +1827,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, { struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; + unsigned int ctx_features; int ret; __u64 features; - ret = -EINVAL; - if (ctx->state != UFFD_STATE_WAIT_API) - goto out; ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; @@ -1853,9 +1847,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; - ctx->state = UFFD_STATE_RUNNING; + /* only enable the requested features for this uffd context */ - ctx->features = uffd_ctx_features(features); + ctx_features = uffd_ctx_features(features); + ret = -EINVAL; + if (cmpxchg(&ctx->features, 0, ctx_features) != 0) + goto err_out; + ret = 0; out: return ret; @@ -1872,7 +1870,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, int ret = -EINVAL; struct userfaultfd_ctx *ctx = file->private_data; - if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API) + if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx)) return -EINVAL; switch(cmd) { @@ -1976,7 +1974,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) refcount_set(&ctx->refcount, 1); ctx->flags = flags; ctx->features = 0; - ctx->state = UFFD_STATE_WAIT_API; ctx->released = false; ctx->mmap_changing = false; ctx->mm = current->mm; |