summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h15
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c4
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/ceph/caps.c24
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/mdsmap.c8
-rw-r--r--fs/ceph/snap.c3
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/cifs_unicode.c9
-rw-r--r--fs/cifs/inode.c5
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/crypto/hooks.c44
-rw-r--r--fs/debugfs/file.c8
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext4/inline.c6
-rw-r--r--fs/ext4/symlink.c11
-rw-r--r--fs/f2fs/compress.c12
-rw-r--r--fs/f2fs/data.c16
-rw-r--r--fs/f2fs/dir.c14
-rw-r--r--fs/f2fs/file.c9
-rw-r--r--fs/f2fs/gc.c4
-rw-r--r--fs/f2fs/namei.c11
-rw-r--r--fs/f2fs/super.c117
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/fscache/cookie.c14
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c39
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/file.c9
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/fuse/readdir.c7
-rw-r--r--fs/gfs2/glops.c17
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/ops_fstype.c43
-rw-r--r--fs/gfs2/super.c61
-rw-r--r--fs/io-wq.c8
-rw-r--r--fs/io_uring.c87
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/iomap/swapfile.c6
-rw-r--r--fs/isofs/inode.c27
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/joliet.c4
-rw-r--r--fs/lockd/svclock.c32
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/pnfs.c16
-rw-r--r--fs/nfsd/nfs4state.c9
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/notify/fanotify/fanotify.c6
-rw-r--r--fs/overlayfs/dir.c6
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/namei.c4
-rw-r--r--fs/pipe.c33
-rw-r--r--fs/ubifs/file.c12
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/super.c75
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/unicode.c4
-rw-r--r--fs/userfaultfd.c91
65 files changed, 640 insertions, 421 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ae0c38ad1fcb..0791480bf922 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -398,7 +398,7 @@ static int v9fs_test_inode(struct inode *inode, void *data)
umode = p9mode2unixmode(v9ses, st, &rdev);
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ if (inode_wrong_type(inode, umode))
return 0;
/* compare qid details */
@@ -1360,7 +1360,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
* Don't update inode if the file type is different
*/
umode = p9mode2unixmode(v9ses, st, &rdev);
- if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ if (inode_wrong_type(inode, umode))
goto out;
/*
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0028eccb665a..72b67d810b8c 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -59,7 +59,7 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data)
struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ if (inode_wrong_type(inode, st->st_mode))
return 0;
if (inode->i_generation != st->st_gen)
@@ -933,7 +933,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
/*
* Don't update inode if the file type is different
*/
- if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ if (inode_wrong_type(inode, st->st_mode))
goto out;
/*
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 8de4bf8edb9c..5a43f8e07122 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -308,6 +308,21 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
mod);
}
+/*
+ * Called every time after doing a buffered, direct IO or memory mapped write.
+ *
+ * This is to ensure that if we write to a file that was previously fsynced in
+ * the current transaction, then try to fsync it again in the same transaction,
+ * we will know that there were changes in the file and that it needs to be
+ * logged.
+ */
+static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
+{
+ spin_lock(&inode->lock);
+ inode->last_sub_trans = inode->root->log_transid;
+ spin_unlock(&inode->lock);
+}
+
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ffa48ac98d1e..6ab91661cd26 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1862,7 +1862,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
@@ -2006,14 +2005,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
- /*
- * We also have to set last_sub_trans to the current log transid,
- * otherwise subsequent syncs to a file that's been synced in this
- * transaction will appear to have already occurred.
- */
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->last_sub_trans = root->log_transid;
- spin_unlock(&BTRFS_I(inode)->lock);
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
+
if (num_written > 0)
num_written = generic_write_sync(iocb, num_written);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4f21b8fbfd4b..ff3f0638cdb9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -547,7 +547,7 @@ again:
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) {
+ if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
@@ -1202,11 +1202,6 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /* atomic_sub_return implies a barrier */
- if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M)
- cond_wake_up_nomb(&fs_info->async_submit_wait);
-
/*
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
@@ -1215,6 +1210,11 @@ static noinline void async_cow_submit(struct btrfs_work *work)
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
+
+ /* atomic_sub_return implies a barrier */
+ if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
}
static noinline void async_cow_free(struct btrfs_work *work)
@@ -8449,9 +8449,7 @@ again:
set_page_dirty(page);
SetPageUptodate(page);
- BTRFS_I(inode)->last_trans = fs_info->generation;
- BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
@@ -8904,8 +8902,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
bool dest_log_pinned = false;
bool need_abort = false;
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ /*
+ * For non-subvolumes allow exchange only within one subvolume, in the
+ * same inode namespace. Two subvolumes (represented as directory) can
+ * be exchanged as they're a logical link and have a fixed inode number.
+ */
+ if (root != dest &&
+ (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;
/* close the race window with snapshot create/destroy ioctl */
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 858d9153a1cd..f73654d93fa0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -171,7 +171,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
spin_lock(&inode->lock);
inode->last_trans = trans->transaction->transid;
inode->last_sub_trans = inode->root->log_transid;
- inode->last_log_commit = inode->root->last_log_commit;
+ inode->last_log_commit = inode->last_sub_trans - 1;
spin_unlock(&inode->lock);
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f36928efcf92..ec25e5eab349 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -708,7 +708,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
*/
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
- if (ret == 0) {
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
btrfs_init_generic_ref(&ref,
BTRFS_ADD_DELAYED_REF,
ins.objectid, ins.offset, 0);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 920c84fae710..b4fcc48f255b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1129,6 +1129,9 @@ static void btrfs_close_one_device(struct btrfs_device *device)
fs_devices->rw_devices--;
}
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
@@ -2059,7 +2062,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
if (IS_ERR(device)) {
if (PTR_ERR(device) == -ENOENT &&
- strcmp(device_path, "missing") == 0)
+ device_path && strcmp(device_path, "missing") == 0)
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = PTR_ERR(device);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 45093a765a9b..678dac8365ed 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1752,7 +1752,14 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{
- return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ struct ceph_cap_flush *cf;
+
+ cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ if (!cf)
+ return NULL;
+
+ cf->is_capsnap = false;
+ return cf;
}
void ceph_free_cap_flush(struct ceph_cap_flush *cf)
@@ -1787,7 +1794,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
prev->wake = true;
wake = false;
}
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
return wake;
}
@@ -1802,7 +1809,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
prev->wake = true;
wake = false;
}
- list_del(&cf->i_list);
+ list_del_init(&cf->i_list);
return wake;
}
@@ -2422,7 +2429,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
- if (!cf->caps) {
+ if (cf->is_capsnap) {
last_snap_flush = cf->tid;
break;
}
@@ -2441,7 +2448,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
first_tid = cf->tid + 1;
- if (cf->caps) {
+ if (!cf->is_capsnap) {
struct cap_msg_args arg;
dout("kick_flushing_caps %p cap %p tid %llu %s\n",
@@ -3564,7 +3571,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
cleaned = cf->caps;
/* Is this a capsnap? */
- if (cf->caps == 0)
+ if (cf->is_capsnap)
continue;
if (cf->tid <= flush_tid) {
@@ -3637,8 +3644,9 @@ out:
while (!list_empty(&to_remove)) {
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
if (wake_ci)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1701902415c4..816cea497537 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1618,7 +1618,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
spin_lock(&mdsc->cap_dirty_lock);
list_for_each_entry(cf, &to_remove, i_list)
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
if (!list_empty(&ci->i_dirty_item)) {
pr_warn_ratelimited(
@@ -1670,8 +1670,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
wake_up_all(&ci->i_cap_wq);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 1096d1d3a84c..47f2903bacb9 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -393,9 +393,11 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
{
int i;
- for (i = 0; i < m->possible_max_rank; i++)
- kfree(m->m_info[i].export_targets);
- kfree(m->m_info);
+ if (m->m_info) {
+ for (i = 0; i < m->possible_max_rank; i++)
+ kfree(m->m_info[i].export_targets);
+ kfree(m->m_info);
+ }
kfree(m->m_data_pg_pools);
kfree(m);
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 803b60a96702..0369f672a76f 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -487,6 +487,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
return;
}
+ capsnap->cap_flush.is_capsnap = true;
+ INIT_LIST_HEAD(&capsnap->cap_flush.i_list);
+ INIT_LIST_HEAD(&capsnap->cap_flush.g_list);
spin_lock(&ci->i_ceph_lock);
used = __ceph_caps_used(ci);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6712509ae1d6..a8c460393b01 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -181,8 +181,9 @@ struct ceph_cap {
struct ceph_cap_flush {
u64 tid;
- int caps; /* 0 means capsnap */
+ int caps;
bool wake; /* wake up flush waiters when finish ? */
+ bool is_capsnap; /* true means capsnap */
struct list_head g_list; // global
struct list_head i_list; // per inode
};
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 9bd03a231032..171ad8b42107 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -358,14 +358,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b1f0c05d6eaf..b11a919b9cab 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -425,8 +425,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
}
/* if filetype is different, return error */
- if (unlikely(((*pinode)->i_mode & S_IFMT) !=
- (fattr.cf_mode & S_IFMT))) {
+ if (unlikely(inode_wrong_type(*pinode, fattr.cf_mode))) {
CIFS_I(*pinode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgiiu_exit;
@@ -1243,7 +1242,7 @@ cifs_find_inode(struct inode *inode, void *opaque)
return 0;
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
+ if (inode_wrong_type(inode, fattr->cf_mode))
return 0;
/* if it's not a directory or has no dentries, then flag it */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1a0298d1e7cd..d58c5ffeca0d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -888,7 +888,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 061418be4b08..4180371bf864 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -379,3 +379,47 @@ err_kfree:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
+
+/**
+ * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks
+ * @path: the path for the encrypted symlink being queried
+ * @stat: the struct being filled with the symlink's attributes
+ *
+ * Override st_size of encrypted symlinks to be the length of the decrypted
+ * symlink target (or the no-key encoded symlink target, if the key is
+ * unavailable) rather than the length of the encrypted symlink target. This is
+ * necessary for st_size to match the symlink target that userspace actually
+ * sees. POSIX requires this, and some userspace programs depend on it.
+ *
+ * This requires reading the symlink target from disk if needed, setting up the
+ * inode's encryption key if possible, and then decrypting or encoding the
+ * symlink target. This makes lstat() more heavyweight than is normally the
+ * case. However, decrypted symlink targets will be cached in ->i_link, so
+ * usually the symlink won't have to be read and decrypted again later if/when
+ * it is actually followed, readlink() is called, or lstat() is called again.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat)
+{
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
+ const char *link;
+ DEFINE_DELAYED_CALL(done);
+
+ /*
+ * To get the symlink target that userspace will see (whether it's the
+ * decrypted target or the no-key encoded target), we can just get it in
+ * the same way the VFS does during path resolution and readlink().
+ */
+ link = READ_ONCE(inode->i_link);
+ if (!link) {
+ link = inode->i_op->get_link(dentry, inode, &done);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ stat->size = strlen(link);
+ do_delayed_call(&done);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 686e0ad28788..3aa5eb9ce498 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -179,8 +179,10 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
if (!fops_get(real_fops)) {
#ifdef CONFIG_MODULES
if (real_fops->owner &&
- real_fops->owner->state == MODULE_STATE_GOING)
+ real_fops->owner->state == MODULE_STATE_GOING) {
+ r = -ENXIO;
goto out;
+ }
#endif
/* Huh? Module did not clean up after itself at exit? */
@@ -314,8 +316,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
if (!fops_get(real_fops)) {
#ifdef CONFIG_MODULES
if (real_fops->owner &&
- real_fops->owner->state == MODULE_STATE_GOING)
+ real_fops->owner->state == MODULE_STATE_GOING) {
+ r = -ENXIO;
goto out;
+ }
#endif
/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/exec.c b/fs/exec.c
index c7a4ef8df305..ca89e0e3ef10 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1347,10 +1347,6 @@ int begin_new_exec(struct linux_binprm * bprm)
WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
flush_signal_handlers(me, 0);
- retval = set_cred_ucounts(bprm->cred);
- if (retval < 0)
- goto out_unlock;
-
/*
* install the new credentials for this executable
*/
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index b41512d1badc..0f7b53d5edea 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -750,6 +750,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
+ /*
+ * ei->i_inline_off may have changed since ext4_write_begin()
+ * called ext4_try_to_write_inline_data()
+ */
+ (void) ext4_find_inline_data_nolock(inode);
+
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
kunmap_atomic(kaddr);
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index dd05af983092..a9457fed351e 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -52,10 +52,19 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
return paddr;
}
+static int ext4_encrypted_symlink_getattr(const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ ext4_getattr(path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
- .getattr = ext4_getattr,
+ .getattr = ext4_encrypted_symlink_getattr,
.listxattr = ext4_listxattr,
};
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index f94b13075ea4..30987ea011f1 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1308,12 +1308,6 @@ out_destroy_crypt:
for (--i; i >= 0; i--)
fscrypt_finalize_bounce_page(&cc->cpages[i]);
- for (i = 0; i < cc->nr_cpages; i++) {
- if (!cc->cpages[i])
- continue;
- f2fs_compress_free_page(cc->cpages[i]);
- cc->cpages[i] = NULL;
- }
out_put_cic:
kmem_cache_free(cic_entry_slab, cic);
out_put_dnode:
@@ -1324,6 +1318,12 @@ out_unlock_op:
else
f2fs_unlock_op(sbi);
out_free:
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (!cc->cpages[i])
+ continue;
+ f2fs_compress_free_page(cc->cpages[i]);
+ cc->cpages[i] = NULL;
+ }
page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
return -EAGAIN;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cfae2dddb0ba..1b11a42847c4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1550,7 +1550,21 @@ next_dnode:
if (err) {
if (flag == F2FS_GET_BLOCK_BMAP)
map->m_pblk = 0;
+
if (err == -ENOENT) {
+ /*
+ * There is one exceptional case that read_node_page()
+ * may return -ENOENT due to filesystem has been
+ * shutdown or cp_error, so force to convert error
+ * number to EIO for such case.
+ */
+ if (map->m_may_create &&
+ (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto unlock_out;
+ }
+
err = 0;
if (map->m_next_pgofs)
*map->m_next_pgofs =
@@ -2205,6 +2219,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
continue;
}
unlock_page(page);
+ if (for_write)
+ put_page(page);
cc->rpages[i] = NULL;
cc->nr_rpages--;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a..6694298b1660 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -938,6 +938,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
struct blk_plug plug;
bool readdir_ra = sbi->readdir_ra == 1;
+ bool found_valid_dirent = false;
int err = 0;
bit_pos = ((unsigned long)ctx->pos % d->max);
@@ -952,13 +953,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de = &d->dentry[bit_pos];
if (de->name_len == 0) {
+ if (found_valid_dirent || !bit_pos) {
+ printk_ratelimited(
+ "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
+ KERN_WARNING, sbi->sb->s_id,
+ le32_to_cpu(de->ino));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
bit_pos++;
ctx->pos = start_pos + bit_pos;
- printk_ratelimited(
- "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
- KERN_WARNING, sbi->sb->s_id,
- le32_to_cpu(de->ino));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
@@ -1001,6 +1004,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
ctx->pos = start_pos + bit_pos;
+ found_valid_dirent = true;
}
out:
if (readdir_ra)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5c74b2997197..1fbaab1f7aba 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -259,8 +259,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
};
unsigned int seq_id = 0;
- if (unlikely(f2fs_readonly(inode->i_sb) ||
- is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
trace_f2fs_sync_file_enter(inode);
@@ -274,7 +273,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
ret = file_write_and_wait_range(file, start, end);
clear_inode_flag(inode, FI_NEED_IPU);
- if (ret) {
+ if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
return ret;
}
@@ -1081,7 +1080,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
if (pg_start < pg_end) {
- struct address_space *mapping = inode->i_mapping;
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1093,8 +1091,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_inode_pages_range(mapping, blk_start,
- blk_end - 1);
+ truncate_pagecache_range(inode, blk_start, blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e02affb5c0e7..72f227f6ebad 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1477,8 +1477,10 @@ next_step:
int err;
if (S_ISREG(inode->i_mode)) {
- if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
+ if (!down_write_trylock(&fi->i_gc_rwsem[READ])) {
+ sbi->skipped_gc_rwsem++;
continue;
+ }
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
sbi->skipped_gc_rwsem++;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 17d0e5f4efec..710a6f73a685 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1307,9 +1307,18 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
return target;
}
+static int f2fs_encrypted_symlink_getattr(const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ f2fs_getattr(path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
- .getattr = f2fs_getattr,
+ .getattr = f2fs_encrypted_symlink_getattr,
.setattr = f2fs_setattr,
.listxattr = f2fs_listxattr,
};
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c52988067887..de543168b370 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1764,8 +1764,17 @@ restore_flag:
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
+ int retry = DEFAULT_RETRY_IO_COUNT;
+
/* we should flush all the data to keep data consistency */
- sync_inodes_sb(sbi->sb);
+ do {
+ sync_inodes_sb(sbi->sb);
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+ if (unlikely(retry < 0))
+ f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
@@ -2197,6 +2206,33 @@ static int f2fs_enable_quotas(struct super_block *sb)
return 0;
}
+static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type)
+{
+ struct quota_info *dqopt = sb_dqopt(sbi->sb);
+ struct address_space *mapping = dqopt->files[type]->i_mapping;
+ int ret = 0;
+
+ ret = dquot_writeback_dquots(sbi->sb, type);
+ if (ret)
+ goto out;
+
+ ret = filemap_fdatawrite(mapping);
+ if (ret)
+ goto out;
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ goto out;
+
+ ret = filemap_fdatawait(mapping);
+
+ truncate_inode_pages(&dqopt->files[type]->i_data, 0);
+out:
+ if (ret)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ return ret;
+}
+
int f2fs_quota_sync(struct super_block *sb, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -2205,56 +2241,41 @@ int f2fs_quota_sync(struct super_block *sb, int type)
int ret;
/*
- * do_quotactl
- * f2fs_quota_sync
- * down_read(quota_sem)
- * dquot_writeback_dquots()
- * f2fs_dquot_commit
- * block_operation
- * down_read(quota_sem)
- */
- f2fs_lock_op(sbi);
-
- down_read(&sbi->quota_sem);
- ret = dquot_writeback_dquots(sb, type);
- if (ret)
- goto out;
-
- /*
* Now when everything is written we can discard the pagecache so
* that userspace sees the changes.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- struct address_space *mapping;
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- mapping = dqopt->files[cnt]->i_mapping;
+ if (!sb_has_quota_active(sb, type))
+ return 0;
- ret = filemap_fdatawrite(mapping);
- if (ret)
- goto out;
+ inode_lock(dqopt->files[cnt]);
- /* if we are using journalled quota */
- if (is_journalled_quota(sbi))
- continue;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+ down_read(&sbi->quota_sem);
- ret = filemap_fdatawait(mapping);
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ ret = f2fs_quota_sync_file(sbi, cnt);
+
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
- inode_lock(dqopt->files[cnt]);
- truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
inode_unlock(dqopt->files[cnt]);
+
+ if (ret)
+ break;
}
-out:
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
- f2fs_unlock_op(sbi);
return ret;
}
@@ -2889,11 +2910,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- if (le32_to_cpu(raw_super->cp_payload) >
- (blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ if (le32_to_cpu(raw_super->cp_payload) >=
+ (blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE)) {
+ f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE);
return -EFSCORRUPTED;
}
@@ -2929,6 +2952,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
+ unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -3049,6 +3073,17 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ nat_blocks = nat_segs << log_blocks_per_seg;
+ nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+ nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+ if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+ (cp_payload + F2FS_CP_PACKS +
+ NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+ f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+ cp_payload, nat_bits_blocks);
+ return -EFSCORRUPTED;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 05b36b28f2e8..71b43538fa44 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -995,13 +995,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- read_lock(&fa->fa_lock);
+ read_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -1010,7 +1011,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- read_unlock(&fa->fa_lock);
+ read_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 751bc5b1cddf..6104f627cc71 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -74,10 +74,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
static int fscache_set_key(struct fscache_cookie *cookie,
const void *index_key, size_t index_key_len)
{
- unsigned long long h;
u32 *buf;
int bufs;
- int i;
bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
@@ -91,17 +89,7 @@ static int fscache_set_key(struct fscache_cookie *cookie,
}
memcpy(buf, index_key, index_key_len);
-
- /* Calculate a hash and combine this with the length in the first word
- * or first half word
- */
- h = (unsigned long)cookie->parent;
- h += index_key_len + cookie->type;
-
- for (i = 0; i < bufs; i++)
- h += buf[i];
-
- cookie->key_hash = h ^ (h >> 32);
+ cookie->key_hash = fscache_hash(0, buf, bufs);
return 0;
}
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 08e91efbce53..64aa552b296d 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,6 +97,8 @@ extern struct workqueue_struct *fscache_object_wq;
extern struct workqueue_struct *fscache_op_wq;
DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
+
static inline bool fscache_object_congested(void)
{
return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index c1e6cc9091aa..4207f98e405f 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -94,6 +94,45 @@ static struct ctl_table fscache_sysctls_root[] = {
#endif
/*
+ * Mixing scores (in bits) for (7,20):
+ * Input delta: 1-bit 2-bit
+ * 1 round: 330.3 9201.6
+ * 2 rounds: 1246.4 25475.4
+ * 3 rounds: 1907.1 31295.1
+ * 4 rounds: 2042.3 31718.6
+ * Perfect: 2048 31744
+ * (32*64) (32*31/2 * 64)
+ */
+#define HASH_MIX(x, y, a) \
+ ( x ^= (a), \
+ y ^= x, x = rol32(x, 7),\
+ x += y, y = rol32(y,20),\
+ y *= 9 )
+
+static inline unsigned int fold_hash(unsigned long x, unsigned long y)
+{
+ /* Use arch-optimized multiply if one exists */
+ return __hash_32(y ^ __hash_32(x));
+}
+
+/*
+ * Generate a hash. This is derived from full_name_hash(), but we want to be
+ * sure it is arch independent and that it doesn't change as bits of the
+ * computed hash value might appear on disk. The caller also guarantees that
+ * the hashed data will be a series of aligned 32-bit words.
+ */
+unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
+{
+ unsigned int a, x = 0, y = salt;
+
+ for (; n; n--) {
+ a = *data++;
+ HASH_MIX(x, y, a);
+ }
+ return fold_hash(x, y);
+}
+
+/*
* initialise the fs caching module
*/
static int __init fscache_init(void)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 756bbdd563e0..2e300176cb88 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -252,7 +252,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (ret == -ENOMEM)
goto out;
if (ret || fuse_invalid_attr(&outarg.attr) ||
- (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ fuse_stale_inode(inode, outarg.generation, &outarg.attr))
goto invalid;
forget_all_cached_acls(inode);
@@ -1062,7 +1062,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
err = fuse_simple_request(fm, &args);
if (!err) {
if (fuse_invalid_attr(&outarg.attr) ||
- (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ inode_wrong_type(inode, outarg.attr.mode)) {
fuse_make_bad(inode);
err = -EIO;
} else {
@@ -1699,7 +1699,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
}
if (fuse_invalid_attr(&outarg.attr) ||
- (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ inode_wrong_type(inode, outarg.attr.mode)) {
fuse_make_bad(inode);
err = -EIO;
goto error;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8de9c24ac4ac..c9606f2d2864 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -194,12 +194,11 @@ void fuse_finish_open(struct inode *inode, struct file *file)
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!(ff->open_flags & FOPEN_KEEP_CACHE))
- invalidate_inode_pages2(inode->i_mapping);
if (ff->open_flags & FOPEN_STREAM)
stream_open(inode, file);
else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
+
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -207,10 +206,14 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
+ truncate_pagecache(inode, 0);
fuse_invalidate_attr(inode);
if (fc->writeback_cache)
file_update_time(file);
+ } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
+ invalidate_inode_pages2(inode->i_mapping);
}
+
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
@@ -3237,7 +3240,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
{
- int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
if (!err)
fuse_sync_writes(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 8150621101c6..ff94da684017 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -860,6 +860,13 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
return atomic64_read(&fc->attr_version);
}
+static inline bool fuse_stale_inode(const struct inode *inode, int generation,
+ struct fuse_attr *attr)
+{
+ return inode->i_generation != generation ||
+ inode_wrong_type(inode, attr->mode);
+}
+
static inline void fuse_make_bad(struct inode *inode)
{
remove_inode_hash(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f94b0bb57619..053c56af3b6f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -340,8 +340,8 @@ retry:
inode->i_generation = generation;
fuse_init_inode(inode, attr);
unlock_new_inode(inode);
- } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
- /* Inode has changed type, any I/O on the old should fail */
+ } else if (fuse_stale_inode(inode, generation, attr)) {
+ /* nodeid was reused, any I/O on the old inode should fail */
fuse_make_bad(inode);
iput(inode);
goto retry;
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 3441ffa740f3..bc267832310c 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -200,9 +200,12 @@ retry:
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
+ if (inode && get_node_id(inode) != o->nodeid)
+ inode = NULL;
if (!inode ||
- get_node_id(inode) != o->nodeid ||
- ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ fuse_stale_inode(inode, o->generation, &o->attr)) {
+ if (inode)
+ fuse_make_bad(inode);
d_invalidate(dentry);
dput(dentry);
goto retry;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3faa421568b0..bf539eab92c6 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -623,16 +623,13 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error)
- gfs2_consist(sdp);
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
- gfs2_consist(sdp);
-
- /* Initialize some head of the log stuff */
- if (!gfs2_withdrawn(sdp)) {
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
- }
+ if (gfs2_assert_withdraw_delayed(sdp, !error))
+ return error;
+ if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags &
+ GFS2_LOG_HEAD_UNMOUNT))
+ return -EIO;
+ sdp->sd_log_sequence = head.lh_sequence + 1;
+ gfs2_log_pointers_init(sdp, head.lh_blkno);
}
return 0;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 153272f82984..5564aa8b4592 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -296,6 +296,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
/* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ae9c5c1bdc50..b9ed6a6dbcf5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -660,6 +660,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
error = PTR_ERR(lsi->si_sc_inode);
fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
jd->jd_jid, error);
+ kfree(lsi);
goto free_local;
}
lsi->si_jid = jd->jd_jid;
@@ -1071,6 +1072,34 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
}
+static int init_threads(struct gfs2_sbd *sdp)
+{
+ struct task_struct *p;
+ int error = 0;
+
+ p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start logd thread: %d\n", error);
+ return error;
+ }
+ sdp->sd_logd_process = p;
+
+ p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start quotad thread: %d\n", error);
+ goto fail;
+ }
+ sdp->sd_quotad_process = p;
+ return 0;
+
+fail:
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ return error;
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1197,6 +1226,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_per_node;
}
+ if (!sb_rdonly(sb)) {
+ error = init_threads(sdp);
+ if (error) {
+ gfs2_withdraw_delayed(sdp);
+ goto fail_per_node;
+ }
+ }
+
error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
if (error)
goto fail_per_node;
@@ -1206,6 +1243,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
gfs2_freeze_unlock(&freeze_gh);
if (error) {
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 077dc8c035a8..6a355e1347d7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -126,34 +126,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
return 0;
}
-static int init_threads(struct gfs2_sbd *sdp)
-{
- struct task_struct *p;
- int error = 0;
-
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
- return error;
- }
- sdp->sd_logd_process = p;
-
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
- goto fail;
- }
- sdp->sd_quotad_process = p;
- return 0;
-
-fail:
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
- return error;
-}
-
/**
* gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
* @sdp: the filesystem
@@ -168,26 +140,17 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
- error = init_threads(sdp);
- if (error) {
- gfs2_withdraw_delayed(sdp);
- return error;
- }
-
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
- if (gfs2_withdrawn(sdp)) {
- error = -EIO;
- goto fail;
- }
+ if (gfs2_withdrawn(sdp))
+ return -EIO;
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
if (error || gfs2_withdrawn(sdp))
- goto fail;
+ return error;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
gfs2_consist(sdp);
- error = -EIO;
- goto fail;
+ return -EIO;
}
/* Initialize some head of the log stuff */
@@ -195,20 +158,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
gfs2_log_pointers_init(sdp, head.lh_blkno);
error = gfs2_quota_init(sdp);
- if (error || gfs2_withdrawn(sdp))
- goto fail;
-
- set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
- return 0;
-
-fail:
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ if (!error && !gfs2_withdrawn(sdp))
+ set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
return error;
}
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 8bb17b6d4de3..3d5fc76b92d0 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -895,7 +895,7 @@ append:
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
- int work_flags;
+ bool do_wake;
unsigned long flags;
/*
@@ -909,14 +909,14 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return;
}
- work_flags = work->flags;
raw_spin_lock_irqsave(&wqe->lock, flags);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
+ do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
+ !atomic_read(&acct->nr_running);
raw_spin_unlock_irqrestore(&wqe->lock, flags);
- if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
- !atomic_read(&acct->nr_running))
+ if (do_wake)
io_wqe_wake_worker(wqe, acct);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ed641dca7957..d0089039fee7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -889,6 +889,7 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_WRITE] = {
.needs_file = 1,
+ .hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.async_size = sizeof(struct io_async_rw),
@@ -1497,6 +1498,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
ret = hrtimer_try_to_cancel(&io->timer);
if (ret != -1) {
+ if (status)
+ req_set_fail_links(req);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
@@ -3125,7 +3128,7 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
*iovec = NULL;
- return ret < 0 ? ret : sqe_len;
+ return ret;
}
if (req->flags & REQ_F_BUFFER_SELECT) {
@@ -3151,7 +3154,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
if (!iorw)
return __io_import_iovec(rw, req, iovec, iter, needs_lock);
*iovec = NULL;
- return iov_iter_count(&iorw->iter);
+ return 0;
}
static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
@@ -3410,7 +3413,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
ssize_t io_size, ret, ret2;
- size_t iov_count;
bool no_async;
if (rw)
@@ -3419,8 +3421,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
if (ret < 0)
return ret;
- iov_count = iov_iter_count(iter);
- io_size = ret;
+ io_size = iov_iter_count(iter);
req->result = io_size;
ret = 0;
@@ -3436,7 +3437,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (no_async)
goto copy_iov;
- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count);
+ ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
if (unlikely(ret))
goto out_free;
@@ -3455,7 +3456,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (req->file->f_flags & O_NONBLOCK)
goto done;
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, iov_count - iov_iter_count(iter));
+ iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
goto copy_iov;
} else if (ret < 0) {
@@ -3539,7 +3540,6 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
- size_t iov_count;
ssize_t ret, ret2, io_size;
if (rw)
@@ -3548,8 +3548,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
if (ret < 0)
return ret;
- iov_count = iov_iter_count(iter);
- io_size = ret;
+ io_size = iov_iter_count(iter);
req->result = io_size;
/* Ensure we clear previously set non-block flag */
@@ -3567,7 +3566,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
(req->flags & REQ_F_ISREG))
goto copy_iov;
- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), iov_count);
+ ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
if (unlikely(ret))
goto out_free;
@@ -3610,7 +3609,7 @@ done:
} else {
copy_iov:
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, iov_count - iov_iter_count(iter));
+ iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
if (!ret)
return -EAGAIN;
@@ -3745,7 +3744,8 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -3778,7 +3778,8 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock)
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -3809,7 +3810,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
- if (unlikely(sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -3925,7 +3926,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
+ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+ sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4001,7 +4003,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags)
+ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4094,7 +4096,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
@@ -4140,7 +4142,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
- if (sqe->ioprio || sqe->buf_index || sqe->off)
+ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4175,7 +4177,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->addr)
+ if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4213,7 +4215,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4260,7 +4262,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index)
+ sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4316,7 +4318,8 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -4759,7 +4762,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index)
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -4800,7 +4803,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5552,7 +5556,8 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags)
+ if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags |
+ sqe->splice_fd_in)
return -EINVAL;
req->timeout_rem.addr = READ_ONCE(sqe->addr);
@@ -5589,7 +5594,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
+ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
+ sqe->splice_fd_in)
return -EINVAL;
if (off && is_timeout_link)
return -EINVAL;
@@ -5733,7 +5739,8 @@ static int io_async_cancel_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
@@ -7382,7 +7389,7 @@ static int io_sqe_alloc_file_tables(struct fixed_file_data *file_data,
this_files = min(nr_files, IORING_MAX_FILES_TABLE);
table->files = kcalloc(this_files, sizeof(struct file *),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!table->files)
break;
nr_files -= this_files;
@@ -7578,8 +7585,10 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
+ if (nr_args > rlimit(RLIMIT_NOFILE))
+ return -EMFILE;
- file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL);
+ file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL_ACCOUNT);
if (!file_data)
return -ENOMEM;
file_data->ctx = ctx;
@@ -7589,7 +7598,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!file_data->table)
goto out_free;
@@ -9078,9 +9087,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (ctx->flags & IORING_SETUP_SQPOLL) {
io_cqring_overflow_flush(ctx, false, NULL, NULL);
- ret = -EOWNERDEAD;
- if (unlikely(ctx->sqo_dead))
+ if (unlikely(ctx->sqo_dead)) {
+ ret = -EOWNERDEAD;
goto out;
+ }
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9601,11 +9611,12 @@ static int io_register_personality(struct io_ring_ctx *ctx)
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)iod,
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
- if (!ret)
- return id;
- put_cred(iod->creds);
- kfree(iod);
- return ret;
+ if (ret < 0) {
+ put_cred(iod->creds);
+ kfree(iod);
+ return ret;
+ }
+ return id;
}
static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 10cc7979ce38..caed9d98c64a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1045,7 +1045,7 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
if (error) {
SetPageError(page);
- mapping_set_error(inode->i_mapping, -EIO);
+ mapping_set_error(inode->i_mapping, error);
}
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index a5e478de1417..2ceea45aefd8 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -30,11 +30,16 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
{
struct iomap *iomap = &isi->iomap;
unsigned long nr_pages;
+ unsigned long max_pages;
uint64_t first_ppage;
uint64_t first_ppage_reported;
uint64_t next_ppage;
int error;
+ if (unlikely(isi->nr_pages >= isi->sis->max))
+ return 0;
+ max_pages = isi->sis->max - isi->nr_pages;
+
/*
* Round the start up and the end down so that the physical
* extent aligns to a page boundary.
@@ -47,6 +52,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
if (first_ppage >= next_ppage)
return 0;
nr_pages = next_ppage - first_ppage;
+ nr_pages = min(nr_pages, max_pages);
/*
* Calculate how much swap space we're adding; the first page contains
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ec90773527ee..35675a1065be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -155,7 +155,6 @@ struct iso9660_options{
unsigned int overriderockperm:1;
unsigned int uid_set:1;
unsigned int gid_set:1;
- unsigned int utf8:1;
unsigned char map;
unsigned char check;
unsigned int blocksize;
@@ -355,7 +354,6 @@ static int parse_options(char *options, struct iso9660_options *popt)
popt->gid = GLOBAL_ROOT_GID;
popt->uid = GLOBAL_ROOT_UID;
popt->iocharset = NULL;
- popt->utf8 = 0;
popt->overriderockperm = 0;
popt->session=-1;
popt->sbsector=-1;
@@ -388,10 +386,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
case Opt_cruft:
popt->cruft = 1;
break;
+#ifdef CONFIG_JOLIET
case Opt_utf8:
- popt->utf8 = 1;
+ kfree(popt->iocharset);
+ popt->iocharset = kstrdup("utf8", GFP_KERNEL);
+ if (!popt->iocharset)
+ return 0;
break;
-#ifdef CONFIG_JOLIET
case Opt_iocharset:
kfree(popt->iocharset);
popt->iocharset = match_strdup(&args[0]);
@@ -494,7 +495,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
if (sbi->s_nocompress) seq_puts(m, ",nocompress");
if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm");
if (sbi->s_showassoc) seq_puts(m, ",showassoc");
- if (sbi->s_utf8) seq_puts(m, ",utf8");
if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check);
if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping);
@@ -517,9 +517,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",fmode=%o", sbi->s_fmode);
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset &&
- strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
+ if (sbi->s_nls_iocharset)
seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+ else
+ seq_puts(m, ",iocharset=utf8");
#endif
return 0;
}
@@ -862,14 +863,13 @@ root_found:
sbi->s_nls_iocharset = NULL;
#ifdef CONFIG_JOLIET
- if (joliet_level && opt.utf8 == 0) {
+ if (joliet_level) {
char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
- sbi->s_nls_iocharset = load_nls(p);
- if (! sbi->s_nls_iocharset) {
- /* Fail only if explicit charset specified */
- if (opt.iocharset)
+ if (strcmp(p, "utf8") != 0) {
+ sbi->s_nls_iocharset = opt.iocharset ?
+ load_nls(opt.iocharset) : load_nls_default();
+ if (!sbi->s_nls_iocharset)
goto out_freesbi;
- sbi->s_nls_iocharset = load_nls_default();
}
}
#endif
@@ -885,7 +885,6 @@ root_found:
sbi->s_gid = opt.gid;
sbi->s_uid_set = opt.uid_set;
sbi->s_gid_set = opt.gid_set;
- sbi->s_utf8 = opt.utf8;
sbi->s_nocompress = opt.nocompress;
sbi->s_overriderockperm = opt.overriderockperm;
/*
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 055ec6c586f7..dcdc191ed183 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -44,7 +44,6 @@ struct isofs_sb_info {
unsigned char s_session;
unsigned int s_high_sierra:1;
unsigned int s_rock:2;
- unsigned int s_utf8:1;
unsigned int s_cruft:1; /* Broken disks with high byte of length
* containing junk */
unsigned int s_nocompress:1;
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index be8b6a9d0b92..c0f04a1e7f69 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
- unsigned char utf8;
struct nls_table *nls;
unsigned char len = 0;
- utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
- if (utf8) {
+ if (!nls) {
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 61d3cc2283dc..273a81971ed5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -395,28 +395,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
-static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
-{
- struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
- new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
-}
-
-static void nlmsvc_locks_release_private(struct file_lock *fl)
-{
- nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
-}
-
-static const struct file_lock_operations nlmsvc_lock_ops = {
- .fl_copy_lock = nlmsvc_locks_copy_lock,
- .fl_release_private = nlmsvc_locks_release_private,
-};
-
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
- if (fl->fl_owner != NULL)
- fl->fl_ops = &nlmsvc_lock_ops;
}
/*
@@ -634,7 +616,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
+ conflock->svid = lock->fl.fl_pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
@@ -788,9 +770,21 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
+static fl_owner_t nlmsvc_get_owner(fl_owner_t owner)
+{
+ return nlmsvc_get_lockowner(owner);
+}
+
+static void nlmsvc_put_owner(fl_owner_t owner)
+{
+ nlmsvc_put_lockowner(owner);
+}
+
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
+ .lm_get_owner = nlmsvc_get_owner,
+ .lm_put_owner = nlmsvc_put_owner,
};
/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 175312428cdf..046b084136c5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1697,8 +1697,12 @@ static inline bool may_mount(void)
}
#ifdef CONFIG_MANDATORY_FILE_LOCKING
-static inline bool may_mandlock(void)
+static bool may_mandlock(void)
{
+ pr_warn_once("======================================================\n"
+ "WARNING: the mand mount option is being deprecated and\n"
+ " will be removed in v5.15!\n"
+ "======================================================\n");
return capable(CAP_SYS_ADMIN);
}
#else
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9811880470a0..21addb78523d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -322,7 +322,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
if (NFS_FILEID(inode) != fattr->fileid)
return 0;
- if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode))
+ if (inode_wrong_type(inode, fattr->mode))
return 0;
if (nfs_compare_fh(NFS_FH(inode), fh))
return 0;
@@ -1446,7 +1446,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return 0;
return -ESTALE;
}
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode))
return -ESTALE;
@@ -1861,7 +1861,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/*
* Make sure the inode's type hasn't changed.
*/
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) {
/*
* Big trouble! The inode has become a different object.
*/
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 371665e0c154..5370e082aded 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -335,7 +335,7 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
{
- if (pnfs_seqid_is_newer(newseq, lo->plh_barrier))
+ if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
lo->plh_barrier = newseq;
}
@@ -347,11 +347,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
iomode = IOMODE_ANY;
lo->plh_return_iomode = iomode;
set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
- if (seq != 0) {
- WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
+ /*
+ * We must set lo->plh_return_seq to avoid livelocks with
+ * pnfs_layout_need_return()
+ */
+ if (seq == 0)
+ seq = be32_to_cpu(lo->plh_stateid.seqid);
+ if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
lo->plh_return_seq = seq;
- pnfs_barrier_update(lo, seq);
- }
+ pnfs_barrier_update(lo, seq);
}
static void
@@ -1000,7 +1004,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
{
u32 seqid = be32_to_cpu(stateid->seqid);
- return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier;
+ return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
}
/* lget is set to 1 if called from inside send_layoutget call chain */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 80e394a2e3fd..0313390fa4b4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2646,9 +2646,9 @@ static void force_expire_client(struct nfs4_client *clp)
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
bool already_expired;
- spin_lock(&clp->cl_lock);
+ spin_lock(&nn->client_lock);
clp->cl_time = 0;
- spin_unlock(&clp->cl_lock);
+ spin_unlock(&nn->client_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
@@ -6855,8 +6855,7 @@ out:
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
- * vfs_test_lock. (Arguably perhaps test_lock should be done with an
- * inode operation.)
+ * vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
@@ -6871,7 +6870,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
NFSD_MAY_READ));
if (err)
goto out;
+ lock->fl_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->fl_file = NULL;
out:
fh_unlock(fhp);
nfsd_file_put(nf);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d71549f9d42..9c9de2b66e64 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -376,7 +376,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
/* Make sure the type and device matches */
resp->status = nfserr_exist;
- if (inode && type != (inode->i_mode & S_IFMT))
+ if (inode && inode_wrong_type(inode, type))
goto out_unlock;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 1192c9953620..c3af99e94f1d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -129,11 +129,15 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
return false;
}
+/* Limit event merges to limit CPU overhead per event */
+#define FANOTIFY_MAX_MERGE_EVENTS 128
+
/* and the list better be locked by something too! */
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
struct fanotify_event *new;
+ int i = 0;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
new = FANOTIFY_E(event);
@@ -147,6 +151,8 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
return 0;
list_for_each_entry_reverse(test_event, list, list) {
+ if (++i > FANOTIFY_MAX_MERGE_EVENTS)
+ break;
if (fanotify_should_merge(test_event, event)) {
FANOTIFY_E(test_event)->mask |= new->mask;
return 1;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index d1efa3a5a503..08b595c526d7 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -542,8 +542,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
goto out_cleanup;
}
err = ovl_instantiate(dentry, inode, newdentry, hardlink);
- if (err)
- goto out_cleanup;
+ if (err) {
+ ovl_cleanup(udir, newdentry);
+ dput(newdentry);
+ }
out_dput:
dput(upper);
out_unlock:
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index ed35be3fafc6..f469982dcb36 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -390,6 +390,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
*/
take_dentry_name_snapshot(&name, real);
this = lookup_one_len(name.name.name, connected, name.name.len);
+ release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
@@ -404,7 +405,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
}
out:
- release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f3309e044f07..092812c2f118 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -366,7 +366,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
return PTR_ERR(origin);
if (upperdentry && !ovl_is_whiteout(upperdentry) &&
- ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+ inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
goto invalid;
if (!*stackp)
@@ -724,7 +724,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
index = ERR_PTR(-ESTALE);
goto out;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
- ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+ inode_wrong_type(inode, d_inode(origin)->i_mode)) {
/*
* Index should always be of the same file type as origin
* except for the case of a whiteout index. A whiteout
diff --git a/fs/pipe.c b/fs/pipe.c
index 28b2e973f10e..d6d4019ba32f 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -363,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* _very_ unlikely case that the pipe was full, but we got
* no data.
*/
- if (unlikely(was_full)) {
+ if (unlikely(was_full))
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
/*
* But because we didn't read anything, at this point we can
@@ -385,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
wake_next_reader = false;
__pipe_unlock(pipe);
- if (was_full) {
+ if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
if (wake_next_reader)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
if (ret > 0)
file_accessed(filp);
return ret;
@@ -444,9 +442,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
#endif
/*
- * Epoll nonsensically wants a wakeup whether the pipe
- * was already empty or not.
- *
* If it wasn't empty we try to merge new data into
* the last buffer.
*
@@ -455,9 +450,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = true;
+ was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
- if (chars && !pipe_empty(head, pipe->tail)) {
+ if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
@@ -568,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* become empty while we dropped the lock.
*/
__pipe_unlock(pipe);
- if (was_empty) {
+ if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
__pipe_lock(pipe);
was_empty = pipe_empty(pipe->head, pipe->tail);
@@ -590,11 +584,13 @@ out:
* This is particularly important for small writes, because of
* how (for example) the GNU make jobserver uses small writes to
* wake up pending jobs
+ *
+ * Epoll nonsensically wants a wakeup whether the pipe
+ * was already empty or not.
*/
- if (was_empty) {
+ if (was_empty || pipe->poll_usage)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
if (wake_next_writer)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
@@ -654,6 +650,9 @@ pipe_poll(struct file *filp, poll_table *wait)
struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, tail;
+ /* Epoll has some historical nasty semantics, this enables them */
+ pipe->poll_usage = 1;
+
/*
* Reading pipe state only -- no need for acquiring the semaphore.
*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b77d1637bbbc..f4826b6da682 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1629,6 +1629,16 @@ static const char *ubifs_get_link(struct dentry *dentry,
return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
}
+static int ubifs_symlink_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ ubifs_getattr(path, stat, request_mask, query_flags);
+
+ if (IS_ENCRYPTED(d_inode(path->dentry)))
+ return fscrypt_symlink_getattr(path, stat);
+ return 0;
+}
+
const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
@@ -1654,7 +1664,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.get_link = ubifs_get_link,
.setattr = ubifs_setattr,
- .getattr = ubifs_getattr,
+ .getattr = ubifs_symlink_getattr,
#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
#endif
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index eab94527340d..1614d308d0f0 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d0df217f4712..5d2b820ef303 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -108,16 +108,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
return NULL;
lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
partnum = le32_to_cpu(lvid->numOfPartitions);
- if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
- offsetof(struct logicalVolIntegrityDesc, impUse)) /
- (2 * sizeof(uint32_t)) < partnum) {
- udf_err(sb, "Logical volume integrity descriptor corrupted "
- "(numOfPartitions = %u)!\n", partnum);
- return NULL;
- }
/* The offset is to skip freeSpaceTable and sizeTable arrays */
offset = partnum * 2 * sizeof(uint32_t);
- return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
+ return (struct logicalVolIntegrityDescImpUse *)
+ (((uint8_t *)(lvid + 1)) + offset);
}
/* UDF filesystem type */
@@ -349,10 +343,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
if (sbi->s_anchor != 0)
seq_printf(seq, ",anchor=%u", sbi->s_anchor);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
- seq_puts(seq, ",utf8");
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map)
+ if (sbi->s_nls_map)
seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset);
+ else
+ seq_puts(seq, ",iocharset=utf8");
return 0;
}
@@ -557,19 +551,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
/* Ignored (never implemented properly) */
break;
case Opt_utf8:
- uopt->flags |= (1 << UDF_FLAG_UTF8);
+ if (!remount) {
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
break;
case Opt_iocharset:
if (!remount) {
- if (uopt->nls_map)
- unload_nls(uopt->nls_map);
- /*
- * load_nls() failure is handled later in
- * udf_fill_super() after all options are
- * parsed.
- */
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
+ /* When nls_map is not loaded then UTF-8 is used */
+ if (!remount && strcmp(args[0].from, "utf8") != 0) {
uopt->nls_map = load_nls(args[0].from);
- uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
+ if (!uopt->nls_map) {
+ pr_err("iocharset %s not found\n",
+ args[0].from);
+ return 0;
+ }
}
break;
case Opt_uforget:
@@ -1541,6 +1540,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
struct udf_sb_info *sbi = UDF_SB(sb);
struct logicalVolIntegrityDesc *lvid;
int indirections = 0;
+ u32 parts, impuselen;
while (++indirections <= UDF_MAX_LVID_NESTING) {
final_bh = NULL;
@@ -1567,15 +1567,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
if (lvid->nextIntegrityExt.extLength == 0)
- return;
+ goto check;
loc = leea_to_cpu(lvid->nextIntegrityExt);
}
udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
UDF_MAX_LVID_NESTING);
+out_err:
brelse(sbi->s_lvid_bh);
sbi->s_lvid_bh = NULL;
+ return;
+check:
+ parts = le32_to_cpu(lvid->numOfPartitions);
+ impuselen = le32_to_cpu(lvid->lengthOfImpUse);
+ if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize ||
+ sizeof(struct logicalVolIntegrityDesc) + impuselen +
+ 2 * parts * sizeof(u32) > sb->s_blocksize) {
+ udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), "
+ "ignoring.\n", parts, impuselen);
+ goto out_err;
+ }
}
/*
@@ -2138,21 +2150,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!udf_parse_options((char *)options, &uopt, false))
goto parse_options_failure;
- if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
- uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
- udf_err(sb, "utf8 cannot be combined with iocharset\n");
- goto parse_options_failure;
- }
- if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) {
- uopt.nls_map = load_nls_default();
- if (!uopt.nls_map)
- uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP);
- else
- udf_debug("Using default NLS map\n");
- }
- if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP)))
- uopt.flags |= (1 << UDF_FLAG_UTF8);
-
fileset.logicalBlockNum = 0xFFFFFFFF;
fileset.partitionReferenceNum = 0xFFFF;
@@ -2307,8 +2304,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
error_out:
iput(sbi->s_vat_inode);
parse_options_failure:
- if (uopt.nls_map)
- unload_nls(uopt.nls_map);
+ unload_nls(uopt.nls_map);
if (lvid_open)
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
@@ -2358,8 +2354,7 @@ static void udf_put_super(struct super_block *sb)
sbi = UDF_SB(sb);
iput(sbi->s_vat_inode);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
- unload_nls(sbi->s_nls_map);
+ unload_nls(sbi->s_nls_map);
if (!sb_rdonly(sb))
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 758efe557a19..4fa620543d30 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -20,8 +20,6 @@
#define UDF_FLAG_UNDELETE 6
#define UDF_FLAG_UNHIDE 7
#define UDF_FLAG_VARCONV 8
-#define UDF_FLAG_NLS_MAP 9
-#define UDF_FLAG_UTF8 10
#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */
#define UDF_FLAG_GID_FORGET 12
#define UDF_FLAG_UID_SET 13
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 5fcfa96463eb..622569007b53 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb,
return 0;
}
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
else
conv_f = NULL;
@@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb,
if (ocu_max_len <= 0)
return 0;
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->char2uni;
else
conv_f = NULL;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 3d181b1a6d56..17397c7532f1 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -32,11 +32,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly = 1;
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
-enum userfaultfd_state {
- UFFD_STATE_WAIT_API,
- UFFD_STATE_RUNNING,
-};
-
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
@@ -68,8 +63,6 @@ struct userfaultfd_ctx {
unsigned int flags;
/* features requested from the userspace */
unsigned int features;
- /* state machine */
- enum userfaultfd_state state;
/* released */
bool released;
/* memory mappings are changing because of non-cooperative event */
@@ -103,6 +96,14 @@ struct userfaultfd_wake_range {
unsigned long len;
};
+/* internal indication that UFFD_API ioctl was successfully executed */
+#define UFFD_FEATURE_INITIALIZED (1u << 31)
+
+static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
+{
+ return ctx->features & UFFD_FEATURE_INITIALIZED;
+}
+
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
@@ -659,7 +660,6 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
refcount_set(&ctx->refcount, 1);
ctx->flags = octx->flags;
- ctx->state = UFFD_STATE_RUNNING;
ctx->features = octx->features;
ctx->released = false;
ctx->mmap_changing = false;
@@ -936,38 +936,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait)
poll_wait(file, &ctx->fd_wqh, wait);
- switch (ctx->state) {
- case UFFD_STATE_WAIT_API:
+ if (!userfaultfd_is_initialized(ctx))
return EPOLLERR;
- case UFFD_STATE_RUNNING:
- /*
- * poll() never guarantees that read won't block.
- * userfaults can be waken before they're read().
- */
- if (unlikely(!(file->f_flags & O_NONBLOCK)))
- return EPOLLERR;
- /*
- * lockless access to see if there are pending faults
- * __pollwait last action is the add_wait_queue but
- * the spin_unlock would allow the waitqueue_active to
- * pass above the actual list_add inside
- * add_wait_queue critical section. So use a full
- * memory barrier to serialize the list_add write of
- * add_wait_queue() with the waitqueue_active read
- * below.
- */
- ret = 0;
- smp_mb();
- if (waitqueue_active(&ctx->fault_pending_wqh))
- ret = EPOLLIN;
- else if (waitqueue_active(&ctx->event_wqh))
- ret = EPOLLIN;
- return ret;
- default:
- WARN_ON_ONCE(1);
+ /*
+ * poll() never guarantees that read won't block.
+ * userfaults can be waken before they're read().
+ */
+ if (unlikely(!(file->f_flags & O_NONBLOCK)))
return EPOLLERR;
- }
+ /*
+ * lockless access to see if there are pending faults
+ * __pollwait last action is the add_wait_queue but
+ * the spin_unlock would allow the waitqueue_active to
+ * pass above the actual list_add inside
+ * add_wait_queue critical section. So use a full
+ * memory barrier to serialize the list_add write of
+ * add_wait_queue() with the waitqueue_active read
+ * below.
+ */
+ ret = 0;
+ smp_mb();
+ if (waitqueue_active(&ctx->fault_pending_wqh))
+ ret = EPOLLIN;
+ else if (waitqueue_active(&ctx->event_wqh))
+ ret = EPOLLIN;
+
+ return ret;
}
static const struct file_operations userfaultfd_fops;
@@ -1161,7 +1156,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
struct uffd_msg msg;
int no_wait = file->f_flags & O_NONBLOCK;
- if (ctx->state == UFFD_STATE_WAIT_API)
+ if (!userfaultfd_is_initialized(ctx))
return -EINVAL;
for (;;) {
@@ -1816,9 +1811,10 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
- * For the current set of features the bits just coincide
+ * For the current set of features the bits just coincide. Set
+ * UFFD_FEATURE_INITIALIZED to mark the features as enabled.
*/
- return (unsigned int)user_features;
+ return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED;
}
/*
@@ -1831,12 +1827,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
{
struct uffdio_api uffdio_api;
void __user *buf = (void __user *)arg;
+ unsigned int ctx_features;
int ret;
__u64 features;
- ret = -EINVAL;
- if (ctx->state != UFFD_STATE_WAIT_API)
- goto out;
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
@@ -1853,9 +1847,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
goto out;
- ctx->state = UFFD_STATE_RUNNING;
+
/* only enable the requested features for this uffd context */
- ctx->features = uffd_ctx_features(features);
+ ctx_features = uffd_ctx_features(features);
+ ret = -EINVAL;
+ if (cmpxchg(&ctx->features, 0, ctx_features) != 0)
+ goto err_out;
+
ret = 0;
out:
return ret;
@@ -1872,7 +1870,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
int ret = -EINVAL;
struct userfaultfd_ctx *ctx = file->private_data;
- if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API)
+ if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx))
return -EINVAL;
switch(cmd) {
@@ -1976,7 +1974,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
refcount_set(&ctx->refcount, 1);
ctx->flags = flags;
ctx->features = 0;
- ctx->state = UFFD_STATE_WAIT_API;
ctx->released = false;
ctx->mmap_changing = false;
ctx->mm = current->mm;