summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c10
-rw-r--r--fs/binfmt_misc.c56
-rw-r--r--fs/binfmt_script.c17
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/ceph/caps.c5
-rw-r--r--fs/ceph/mds_client.c11
-rw-r--r--fs/ceph/snap.c8
-rw-r--r--fs/cifs/Kconfig5
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2ops.c31
-rw-r--r--fs/cifs/smb2pdu.c33
-rw-r--r--fs/cifs/smb2pdu.h5
-rw-r--r--fs/cifs/smb2proto.h1
-rw-r--r--fs/cifs/smb2transport.c26
-rw-r--r--fs/crypto/keyinfo.c5
-rw-r--r--fs/direct-io.c42
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h24
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/segment.c6
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fscache/object-list.c7
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/iomap.c41
-rw-r--r--fs/mpage.c14
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c3
-rw-r--r--fs/nfs/nfs4idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfsd/nfs4proc.c9
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/dir.c20
-rw-r--r--fs/overlayfs/inode.c20
-rw-r--r--fs/overlayfs/namei.c33
-rw-r--r--fs/overlayfs/overlayfs.h4
-rw-r--r--fs/overlayfs/ovl_entry.h3
-rw-r--r--fs/overlayfs/readdir.c17
-rw-r--r--fs/overlayfs/super.c30
-rw-r--r--fs/overlayfs/util.c24
-rw-r--r--fs/quota/dquot.c27
-rw-r--r--fs/userfaultfd.c66
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c15
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h27
-rw-r--r--fs/xfs/xfs_acl.c22
-rw-r--r--fs/xfs/xfs_aops.c47
-rw-r--r--fs/xfs/xfs_attr_inactive.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c26
-rw-r--r--fs/xfs/xfs_bmap_util.h13
-rw-r--r--fs/xfs/xfs_file.c25
-rw-r--r--fs/xfs/xfs_fsmap.c58
-rw-r--r--fs/xfs/xfs_inode_item.c79
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/xfs_reflink.c9
-rw-r--r--fs/xfs/xfs_super.c2
70 files changed, 666 insertions, 359 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index adaf6f6dd858..e1cbdfdb7c68 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -310,9 +310,13 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
- if (unlikely(copied < len && !PageUptodate(page))) {
- copied = 0;
- goto out;
+ if (!PageUptodate(page)) {
+ if (unlikely(copied < len)) {
+ copied = 0;
+ goto out;
+ } else if (len == PAGE_SIZE) {
+ SetPageUptodate(page);
+ }
}
/*
* No need to use i_size_read() here, the i_size
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ce7181ea60fa..a7c5a9861bef 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -54,7 +54,7 @@ typedef struct {
int size; /* size of magic/mask */
char *magic; /* magic or filename extension */
char *mask; /* mask, NULL for exact match */
- char *interpreter; /* filename of interpreter */
+ const char *interpreter; /* filename of interpreter */
char *name;
struct dentry *dentry;
struct file *interp_file;
@@ -131,27 +131,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
{
Node *fmt;
struct file *interp_file = NULL;
- char iname[BINPRM_BUF_SIZE];
- const char *iname_addr = iname;
int retval;
int fd_binary = -1;
retval = -ENOEXEC;
if (!enabled)
- goto ret;
+ return retval;
/* to keep locking time low, we copy the interpreter string */
read_lock(&entries_lock);
fmt = check_file(bprm);
if (fmt)
- strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
+ dget(fmt->dentry);
read_unlock(&entries_lock);
if (!fmt)
- goto ret;
+ return retval;
/* Need to be able to load the file after exec */
+ retval = -ENOENT;
if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
- return -ENOENT;
+ goto ret;
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
retval = remove_arg_zero(bprm);
@@ -195,22 +194,22 @@ static int load_misc_binary(struct linux_binprm *bprm)
bprm->argc++;
/* add the interp as argv[0] */
- retval = copy_strings_kernel(1, &iname_addr, bprm);
+ retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
if (retval < 0)
goto error;
bprm->argc++;
/* Update interp in case binfmt_script needs it. */
- retval = bprm_change_interp(iname, bprm);
+ retval = bprm_change_interp(fmt->interpreter, bprm);
if (retval < 0)
goto error;
- if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
+ if (fmt->flags & MISC_FMT_OPEN_FILE) {
interp_file = filp_clone_open(fmt->interp_file);
if (!IS_ERR(interp_file))
deny_write_access(interp_file);
} else {
- interp_file = open_exec(iname);
+ interp_file = open_exec(fmt->interpreter);
}
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
@@ -238,6 +237,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
goto error;
ret:
+ dput(fmt->dentry);
return retval;
error:
if (fd_binary > 0)
@@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
static void bm_evict_inode(struct inode *inode)
{
+ Node *e = inode->i_private;
+
+ if (e && e->flags & MISC_FMT_OPEN_FILE)
+ filp_close(e->interp_file, NULL);
+
clear_inode(inode);
- kfree(inode->i_private);
+ kfree(e);
}
static void kill_node(Node *e)
@@ -603,24 +608,14 @@ static void kill_node(Node *e)
struct dentry *dentry;
write_lock(&entries_lock);
- dentry = e->dentry;
- if (dentry) {
- list_del_init(&e->list);
- e->dentry = NULL;
- }
+ list_del_init(&e->list);
write_unlock(&entries_lock);
- if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
- filp_close(e->interp_file, NULL);
- e->interp_file = NULL;
- }
-
- if (dentry) {
- drop_nlink(d_inode(dentry));
- d_drop(dentry);
- dput(dentry);
- simple_release_fs(&bm_mnt, &entry_count);
- }
+ dentry = e->dentry;
+ drop_nlink(d_inode(dentry));
+ d_drop(dentry);
+ dput(dentry);
+ simple_release_fs(&bm_mnt, &entry_count);
}
/* /<entry> */
@@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
root = file_inode(file)->i_sb->s_root;
inode_lock(d_inode(root));
- kill_node(e);
+ if (!list_empty(&e->list))
+ kill_node(e);
inode_unlock(d_inode(root));
break;
@@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
inode_lock(d_inode(root));
while (!list_empty(&entries))
- kill_node(list_entry(entries.next, Node, list));
+ kill_node(list_first_entry(&entries, Node, list));
inode_unlock(d_inode(root));
break;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index afdf4e3cafc2..7cde3f46ad26 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm)
const char *i_arg, *i_name;
char *cp;
struct file *file;
- char interp[BINPRM_BUF_SIZE];
int retval;
if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
@@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm)
break;
}
for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
- if (*cp == '\0')
+ if (*cp == '\0')
return -ENOEXEC; /* No interpreter name found */
i_name = cp;
i_arg = NULL;
@@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm)
*cp++ = '\0';
if (*cp)
i_arg = cp;
- strcpy (interp, i_name);
/*
* OK, we've parsed out the interpreter name and
* (optional) argument.
@@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm)
if (retval)
return retval;
retval = copy_strings_kernel(1, &bprm->interp, bprm);
- if (retval < 0) return retval;
+ if (retval < 0)
+ return retval;
bprm->argc++;
if (i_arg) {
retval = copy_strings_kernel(1, &i_arg, bprm);
- if (retval < 0) return retval;
+ if (retval < 0)
+ return retval;
bprm->argc++;
}
retval = copy_strings_kernel(1, &i_name, bprm);
- if (retval) return retval;
+ if (retval)
+ return retval;
bprm->argc++;
- retval = bprm_change_interp(interp, bprm);
+ retval = bprm_change_interp(i_name, bprm);
if (retval < 0)
return retval;
/*
* OK, now restart the process with the interpreter's dentry.
*/
- file = open_exec(interp);
+ file = open_exec(i_name);
if (IS_ERR(file))
return PTR_ERR(file);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93d088ffc05c..789f55e851ae 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
set_page_writeback(page);
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
- if (result)
+ if (result) {
end_page_writeback(page);
- else
+ } else {
+ clean_page_buffers(page);
unlock_page(page);
+ }
blk_queue_exit(bdev->bd_queue);
return result;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 899ddaeeacec..8fc690384c58 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -722,7 +722,7 @@ struct btrfs_delayed_root;
* Indicate that a whole-filesystem exclusive operation is running
* (device replace, resize, device add/delete, balance)
*/
-#define BTRFS_FS_EXCL_OP 14
+#define BTRFS_FS_EXCL_OP 16
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 12ab19a4b93e..970190cd347e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, sector << 9);
+ bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 35a128acfbd1..161694b66038 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1135,7 +1135,7 @@ static int btrfs_fill_super(struct super_block *sb,
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
- sb->s_flags |= MS_I_VERSION;
+ sb->s_flags |= SB_I_VERSION;
sb->s_iflags |= SB_I_CGROUPWB;
err = super_setup_bdi(sb);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 157fe59fbabe..1978a8cb1cb1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1991,6 +1991,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
retry:
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+ spin_unlock(&ci->i_ceph_lock);
dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
goto out;
}
@@ -2008,8 +2009,10 @@ retry:
mutex_lock(&session->s_mutex);
goto retry;
}
- if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
+ if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) {
+ spin_unlock(&ci->i_ceph_lock);
goto out;
+ }
flushing = __mark_caps_flushing(inode, session, true,
&flush_tid, &oldest_flush_tid);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 84edfc60d87a..f23c820daaed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -734,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = req->r_inode;
ihold(inode);
} else {
- /* req->r_dentry is non-null for LSSNAP request.
- * fall-thru */
- WARN_ON_ONCE(!req->r_dentry);
+ /* req->r_dentry is non-null for LSSNAP request */
+ rcu_read_lock();
+ inode = get_nonsnap_parent(req->r_dentry);
+ rcu_read_unlock();
+ dout("__choose_mds using snapdir's parent %p\n", inode);
}
- }
- if (!inode && req->r_dentry) {
+ } else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
struct dentry *parent;
struct inode *dir;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 1ffc8b426c1c..7fc0b850c352 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm,
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
- if (realm->cached_context) {
- ceph_put_snap_context(realm->cached_context);
- /* queue realm for cap_snap creation */
- list_add_tail(&realm->dirty_item, dirty_realms);
- }
+ ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
+ /* queue realm for cap_snap creation */
+ list_add_tail(&realm->dirty_item, dirty_realms);
return 0;
fail:
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index f7243617316c..d5b2e12b5d02 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -5,9 +5,14 @@ config CIFS
select CRYPTO
select CRYPTO_MD4
select CRYPTO_MD5
+ select CRYPTO_SHA256
+ select CRYPTO_CMAC
select CRYPTO_HMAC
select CRYPTO_ARC4
+ select CRYPTO_AEAD2
+ select CRYPTO_CCM
select CRYPTO_ECB
+ select CRYPTO_AES
select CRYPTO_DES
help
This is the client VFS module for the SMB3 family of NAS protocols,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de5b2e1fcce5..e185b2853eab 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -661,7 +661,9 @@ struct TCP_Server_Info {
#endif
unsigned int max_read;
unsigned int max_write;
- __u8 preauth_hash[512];
+#ifdef CONFIG_CIFS_SMB311
+ __u8 preauth_sha_hash[64]; /* save initital negprot hash */
+#endif /* 3.1.1 */
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
unsigned long echo_interval;
@@ -849,7 +851,9 @@ struct cifs_ses {
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
- __u8 preauth_hash[512];
+#ifdef CONFIG_CIFS_SMB311
+ __u8 preauth_sha_hash[64];
+#endif /* 3.1.1 */
};
static inline bool
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e702d48bd023..81ba6e0d88d8 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -204,7 +204,8 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon)
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
int i;
- if (unlikely(direntry->d_name.len >
+ if (unlikely(tcon->fsAttrInfo.MaxPathNameComponentLength &&
+ direntry->d_name.len >
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength)))
return -ENAMETOOLONG;
@@ -520,7 +521,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
rc = check_name(direntry, tcon);
if (rc)
- goto out_free_xid;
+ goto out;
server = tcon->ses->server;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7ca9808a0daa..62c88dfed57b 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_DATATYPE_MISALIGNMENT, -EIO, "STATUS_DATATYPE_MISALIGNMENT"},
{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
- {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
+ {STATUS_BUFFER_OVERFLOW, -E2BIG, "STATUS_BUFFER_OVERFLOW"},
{STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0dafdbae1f8c..bdb963d0ba32 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -522,6 +522,7 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
struct smb2_file_full_ea_info *smb2_data;
+ int ea_buf_size = SMB2_MIN_EA_BUF;
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path)
@@ -541,14 +542,32 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
- smb2_data = kzalloc(SMB2_MAX_EA_BUF, GFP_KERNEL);
- if (smb2_data == NULL) {
- SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- return -ENOMEM;
+ while (1) {
+ smb2_data = kzalloc(ea_buf_size, GFP_KERNEL);
+ if (smb2_data == NULL) {
+ SMB2_close(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid);
+ return -ENOMEM;
+ }
+
+ rc = SMB2_query_eas(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid,
+ ea_buf_size, smb2_data);
+
+ if (rc != -E2BIG)
+ break;
+
+ kfree(smb2_data);
+ ea_buf_size <<= 1;
+
+ if (ea_buf_size > SMB2_MAX_EA_BUF) {
+ cifs_dbg(VFS, "EA size is too large\n");
+ SMB2_close(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid);
+ return -ENOMEM;
+ }
}
- rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- smb2_data);
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (!rc)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 6f0e6343c15e..5331631386a2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -648,7 +648,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
{
int rc = 0;
struct validate_negotiate_info_req vneg_inbuf;
- struct validate_negotiate_info_rsp *pneg_rsp;
+ struct validate_negotiate_info_rsp *pneg_rsp = NULL;
u32 rsplen;
u32 inbuflen; /* max of 4 dialects */
@@ -727,8 +727,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
rsplen);
/* relax check since Mac returns max bufsize allowed on ioctl */
- if (rsplen > CIFSMaxBufSize)
- return -EIO;
+ if ((rsplen > CIFSMaxBufSize)
+ || (rsplen < sizeof(struct validate_negotiate_info_rsp)))
+ goto err_rsp_free;
}
/* check validate negotiate info response matches what we got earlier */
@@ -747,10 +748,13 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
/* validate negotiate successful */
cifs_dbg(FYI, "validate negotiate info successful\n");
+ kfree(pneg_rsp);
return 0;
vneg_out:
cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
+err_rsp_free:
+ kfree(pneg_rsp);
return -EIO;
}
@@ -1255,7 +1259,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
struct smb2_tree_connect_req *req;
struct smb2_tree_connect_rsp *rsp = NULL;
struct kvec iov[2];
- struct kvec rsp_iov;
+ struct kvec rsp_iov = { NULL, 0 };
int rc = 0;
int resp_buftype;
int unc_path_len;
@@ -1372,7 +1376,7 @@ tcon_exit:
return rc;
tcon_error_exit:
- if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
+ if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
}
goto tcon_exit;
@@ -1975,6 +1979,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
} else
iov[0].iov_len = get_rfc1002_length(req) + 4;
+ /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
+ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
+ req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
@@ -2191,9 +2198,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
req->AdditionalInformation = cpu_to_le32(additional_info);
- /* 4 for rfc1002 length field and 1 for Buffer */
- req->InputBufferOffset =
- cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
+
+ /*
+ * We do not use the input buffer (do not send extra byte)
+ */
+ req->InputBufferOffset = 0;
+ inc_rfc1001_len(req, -1);
+
req->OutputBufferLength = cpu_to_le32(output_len);
iov[0].iov_base = (char *)req;
@@ -2233,12 +2244,12 @@ qinf_exit:
}
int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- struct smb2_file_full_ea_info *data)
+ u64 persistent_fid, u64 volatile_fid,
+ int ea_buf_size, struct smb2_file_full_ea_info *data)
{
return query_info(xid, tcon, persistent_fid, volatile_fid,
FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0,
- SMB2_MAX_EA_BUF,
+ ea_buf_size,
sizeof(struct smb2_file_full_ea_info),
(void **)&data,
NULL);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 6c9653a130c8..c2ec934be968 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -832,7 +832,7 @@ struct smb2_flush_rsp {
/* Channel field for read and write: exactly one of following flags can be set*/
#define SMB2_CHANNEL_NONE 0x00000000
#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */
-#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */
+#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */
/* SMB2 read request without RFC1001 length at the beginning */
struct smb2_read_plain_req {
@@ -1178,7 +1178,8 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
char FileName[0]; /* Name to be assigned to new link */
} __packed; /* level 11 Set */
-#define SMB2_MAX_EA_BUF 2048
+#define SMB2_MIN_EA_BUF 2048
+#define SMB2_MAX_EA_BUF 65536
struct smb2_file_full_ea_info { /* encoding of response for level 15 */
__le32 next_entry_offset;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 003217099ef3..e9ab5227e7a8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -134,6 +134,7 @@ extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
+ int ea_buf_size,
struct smb2_file_full_ea_info *data);
extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 67367cf1f8cd..99493946e2f9 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -390,6 +390,7 @@ generate_smb30signingkey(struct cifs_ses *ses)
return generate_smb3signingkey(ses, &triplet);
}
+#ifdef CONFIG_CIFS_SMB311
int
generate_smb311signingkey(struct cifs_ses *ses)
@@ -398,25 +399,26 @@ generate_smb311signingkey(struct cifs_ses *ses)
struct derivation *d;
d = &triplet.signing;
- d->label.iov_base = "SMB2AESCMAC";
- d->label.iov_len = 12;
- d->context.iov_base = "SmbSign";
- d->context.iov_len = 8;
+ d->label.iov_base = "SMBSigningKey";
+ d->label.iov_len = 14;
+ d->context.iov_base = ses->preauth_sha_hash;
+ d->context.iov_len = 64;
d = &triplet.encryption;
- d->label.iov_base = "SMB2AESCCM";
- d->label.iov_len = 11;
- d->context.iov_base = "ServerIn ";
- d->context.iov_len = 10;
+ d->label.iov_base = "SMBC2SCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = ses->preauth_sha_hash;
+ d->context.iov_len = 64;
d = &triplet.decryption;
- d->label.iov_base = "SMB2AESCCM";
- d->label.iov_len = 11;
- d->context.iov_base = "ServerOut";
- d->context.iov_len = 10;
+ d->label.iov_base = "SMBS2CCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = ses->preauth_sha_hash;
+ d->context.iov_len = 64;
return generate_smb3signingkey(ses, &triplet);
}
+#endif /* 311 */
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 018c588c7ac3..8e704d12a1cf 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -109,6 +109,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
goto out;
}
ukp = user_key_payload_locked(keyring_key);
+ if (!ukp) {
+ /* key was revoked before we acquired its semaphore */
+ res = -EKEYREVOKED;
+ goto out;
+ }
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
goto out;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 62cf812ed0e5..b53e66d9abd7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -45,6 +45,12 @@
#define DIO_PAGES 64
/*
+ * Flags for dio_complete()
+ */
+#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */
+#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */
+
+/*
* This code generally works in units of "dio_blocks". A dio_block is
* somewhere between the hard sector size and the filesystem block size. it
* is determined on a per-invocation basis. When talking to the filesystem
@@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio,
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
+static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
{
loff_t offset = dio->iocb->ki_pos;
ssize_t transferred = 0;
@@ -259,14 +265,27 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if (ret == 0)
ret = transferred;
+ if (dio->end_io) {
+ // XXX: ki_pos??
+ err = dio->end_io(dio->iocb, offset, ret, dio->private);
+ if (err)
+ ret = err;
+ }
+
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
+ *
+ * And this page cache invalidation has to be after dio->end_io(), as
+ * some filesystems convert unwritten extents to real allocations in
+ * end_io() when necessary, otherwise a racing buffer read would cache
+ * zeros from unwritten extents.
*/
- if (ret > 0 && dio->op == REQ_OP_WRITE &&
+ if (flags & DIO_COMPLETE_INVALIDATE &&
+ ret > 0 && dio->op == REQ_OP_WRITE &&
dio->inode->i_mapping->nrpages) {
err = invalidate_inode_pages2_range(dio->inode->i_mapping,
offset >> PAGE_SHIFT,
@@ -274,18 +293,10 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
WARN_ON_ONCE(err);
}
- if (dio->end_io) {
-
- // XXX: ki_pos??
- err = dio->end_io(dio->iocb, offset, ret, dio->private);
- if (err)
- ret = err;
- }
-
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(dio->inode);
- if (is_async) {
+ if (flags & DIO_COMPLETE_ASYNC) {
/*
* generic_write_sync expects ki_pos to have been updated
* already, but the submission path only does this for
@@ -306,7 +317,7 @@ static void dio_aio_complete_work(struct work_struct *work)
{
struct dio *dio = container_of(work, struct dio, complete_work);
- dio_complete(dio, 0, true);
+ dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE);
}
static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
@@ -348,7 +359,7 @@ static void dio_bio_end_aio(struct bio *bio)
queue_work(dio->inode->i_sb->s_dio_done_wq,
&dio->complete_work);
} else {
- dio_complete(dio, 0, true);
+ dio_complete(dio, 0, DIO_COMPLETE_ASYNC);
}
}
}
@@ -866,7 +877,8 @@ out:
*/
if (sdio->boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- dio_bio_submit(dio, sdio);
+ if (sdio->bio)
+ dio_bio_submit(dio, sdio);
put_page(sdio->cur_page);
sdio->cur_page = NULL;
}
@@ -1359,7 +1371,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
- retval = dio_complete(dio, retval, false);
+ retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE);
} else
BUG_ON(retval != -EIOCBQUEUED);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 9c351bf757b2..3fbc0ff79699 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -84,11 +84,16 @@ struct ecryptfs_page_crypt_context {
static inline struct ecryptfs_auth_tok *
ecryptfs_get_encrypted_key_payload_data(struct key *key)
{
- if (key->type == &key_type_encrypted)
- return (struct ecryptfs_auth_tok *)
- (&((struct encrypted_key_payload *)key->payload.data[0])->payload_data);
- else
+ struct encrypted_key_payload *payload;
+
+ if (key->type != &key_type_encrypted)
return NULL;
+
+ payload = key->payload.data[0];
+ if (!payload)
+ return ERR_PTR(-EKEYREVOKED);
+
+ return (struct ecryptfs_auth_tok *)payload->payload_data;
}
static inline struct key *ecryptfs_get_encrypted_key(char *sig)
@@ -114,12 +119,17 @@ static inline struct ecryptfs_auth_tok *
ecryptfs_get_key_payload_data(struct key *key)
{
struct ecryptfs_auth_tok *auth_tok;
+ struct user_key_payload *ukp;
auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
- if (!auth_tok)
- return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data;
- else
+ if (auth_tok)
return auth_tok;
+
+ ukp = user_key_payload_locked(key);
+ if (!ukp)
+ return ERR_PTR(-EKEYREVOKED);
+
+ return (struct ecryptfs_auth_tok *)ukp->data;
}
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 3cf1546dca82..fa218cd64f74 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -459,7 +459,8 @@ out:
* @auth_tok_key: key containing the authentication token
* @auth_tok: authentication token
*
- * Returns zero on valid auth tok; -EINVAL otherwise
+ * Returns zero on valid auth tok; -EINVAL if the payload is invalid; or
+ * -EKEYREVOKED if the key was revoked before we acquired its semaphore.
*/
static int
ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
@@ -468,6 +469,12 @@ ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
int rc = 0;
(*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key);
+ if (IS_ERR(*auth_tok)) {
+ rc = PTR_ERR(*auth_tok);
+ *auth_tok = NULL;
+ goto out;
+ }
+
if (ecryptfs_verify_version((*auth_tok)->version)) {
printk(KERN_ERR "Data structure version mismatch. Userspace "
"tools must match eCryptfs kernel module with major "
diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..3e14ba25f678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm)
kfree(bprm);
}
-int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
{
/* If a binfmt changed the interp, free it first. */
if (bprm->interp != bprm->filename)
@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
+ membarrier_execve(current);
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b104096fce9e..b0915b734a38 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1677,7 +1677,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
return 1;
case Opt_i_version:
- sb->s_flags |= MS_I_VERSION;
+ sb->s_flags |= SB_I_VERSION;
return 1;
case Opt_lazytime:
sb->s_flags |= MS_LAZYTIME;
@@ -2060,7 +2060,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & MS_I_VERSION)
+ if (sb->s_flags & SB_I_VERSION)
SEQ_OPTS_PUTS("i_version");
if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a7c90386947..4b4a72f392be 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2525,7 +2525,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
void stop_discard_thread(struct f2fs_sb_info *sbi);
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 621b9b3d320b..c695ff462ee6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1210,11 +1210,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
}
/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
{
__issue_discard_cmd(sbi, false);
__drop_discard_cmd(sbi);
- __wait_discard_cmd(sbi, false);
+ __wait_discard_cmd(sbi, !umount);
}
static void mark_discard_range_all(struct f2fs_sb_info *sbi)
@@ -2244,7 +2244,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
}
/* It's time to issue all the filed discards */
mark_discard_range_all(sbi);
- f2fs_wait_discard_bios(sbi);
+ f2fs_wait_discard_bios(sbi, false);
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
return err;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 89f61eb3d167..933c3d529e65 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
- f2fs_wait_discard_bios(sbi);
+ f2fs_wait_discard_bios(sbi, true);
if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
struct cp_control cpc = {
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index b5ab06fabc60..0438d4cd91ef 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -331,6 +331,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
rcu_read_lock();
confkey = user_key_payload_rcu(key);
+ if (!confkey) {
+ /* key was revoked */
+ rcu_read_unlock();
+ key_put(key);
+ goto no_config;
+ }
+
buf = confkey->data;
for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 622081b97426..24967382a7b1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1308,7 +1308,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
*/
over = !dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type);
- ctx->pos = dirent->off;
+ if (!over)
+ ctx->pos = dirent->off;
}
buf += reclen;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 65c88379a3a1..94a745acaef8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1059,7 +1059,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_flags & MS_MANDLOCK)
goto err;
- sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
+ sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
diff --git a/fs/iomap.c b/fs/iomap.c
index be61cf742b5e..d4801f8dd4fd 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
+ loff_t offset = iocb->ki_pos;
ssize_t ret;
- /*
- * Try again to invalidate clean pages which might have been cached by
- * non-direct readahead, or faulted in by get_user_pages() if the source
- * of the write was an mmap'ed region of the file we're writing. Either
- * one is a pretty crazy thing to do, so we don't support it 100%. If
- * this invalidation fails, tough, the write still worked...
- */
- if (!dio->error &&
- (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- iocb->ki_pos >> PAGE_SHIFT,
- (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- }
-
if (dio->end_io) {
ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size,
@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (likely(!ret)) {
ret = dio->size;
/* check for short read */
- if (iocb->ki_pos + ret > dio->i_size &&
+ if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
- ret = dio->i_size - iocb->ki_pos;
+ ret = dio->i_size - offset;
iocb->ki_pos += ret;
}
+ /*
+ * Try again to invalidate clean pages which might have been cached by
+ * non-direct readahead, or faulted in by get_user_pages() if the source
+ * of the write was an mmap'ed region of the file we're writing. Either
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ *
+ * And this page cache invalidation has to be after dio->end_io(), as
+ * some filesystems convert unwritten extents to real allocations in
+ * end_io() when necessary, otherwise a racing buffer read would cache
+ * zeros from unwritten extents.
+ */
+ if (!dio->error &&
+ (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+ int err;
+ err = invalidate_inode_pages2_range(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ (offset + dio->size - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(err);
+ }
+
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
diff --git a/fs/mpage.c b/fs/mpage.c
index 37bb77c1302c..c991faec70b9 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped)
try_to_free_buffers(page);
}
+/*
+ * For situations where we want to clean all buffers attached to a page.
+ * We don't need to calculate how many buffers are attached to the page,
+ * we just need to specify a number larger than the maximum number of buffers.
+ */
+void clean_page_buffers(struct page *page)
+{
+ clean_buffers(page, ~0U);
+}
+
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -605,10 +615,8 @@ alloc_new:
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
- page, wbc)) {
- clean_buffers(page, first_unmapped);
+ page, wbc))
goto out;
- }
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..d18deb4c410b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file)
/* File refers to upper, writable layer? */
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
- if (upperdentry && file_inode(file) == d_inode(upperdentry))
+ if (upperdentry &&
+ (file_inode(file) == d_inode(upperdentry) ||
+ file_inode(file) == d_inode(dentry)))
return 0;
/* Lower layer: can't write to real file, sorry... */
@@ -2823,7 +2825,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
SB_MANDLOCK |
SB_DIRSYNC |
SB_SILENT |
- SB_POSIXACL);
+ SB_POSIXACL |
+ SB_I_VERSION);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index efebe6cf4378..22880ef6d8dd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,7 +218,6 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
static void pnfs_init_server(struct nfs_server *server)
{
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
- rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
}
#else
@@ -888,6 +887,7 @@ struct nfs_server *nfs_alloc_server(void)
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);
+ rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
return server;
}
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 44c638b7876c..508126eb49f9 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -745,7 +745,8 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
- nfs4_fl_put_deviceid(fl->dsaddr);
+ if (fl->dsaddr != NULL)
+ nfs4_fl_put_deviceid(fl->dsaddr);
/* This assumes a single RW lseg */
if (lseg->pls_range.iomode == IOMODE_RW) {
struct nfs4_filelayout *flo;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index dd5d27da8c0c..30426c1a1bbd 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -274,7 +274,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
- if (ret <= 0)
+ if (ret < 0)
return ERR_PTR(ret);
rkey = request_key(&key_type_id_resolver, desc, "");
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6c61e2b99635..f90090e8c959 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8399,8 +8399,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
lo = NFS_I(inode)->layout;
/* If the open stateid was bad, then recover it. */
if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
- nfs4_stateid_match_other(&lgp->args.stateid,
- &lgp->args.ctx->state->stateid)) {
+ !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
spin_unlock(&inode->i_lock);
exception->state = lgp->args.ctx->state;
exception->stateid = &lgp->args.stateid;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 37c8af003275..14ed9791ec9c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1842,8 +1842,8 @@ static void encode_create_session(struct xdr_stream *xdr,
* Assumes OPEN is the biggest non-idempotent compound.
* 2 is the verifier.
*/
- max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE +
- RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT;
+ max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + 2)
+ * XDR_UNIT + RPC_MAX_AUTH_SIZE;
encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3c69db7d4905..8487486ec496 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -927,6 +927,13 @@ nfsd4_secinfo_release(union nfsd4_op_u *u)
exp_put(u->secinfo.si_exp);
}
+static void
+nfsd4_secinfo_no_name_release(union nfsd4_op_u *u)
+{
+ if (u->secinfo_no_name.sin_exp)
+ exp_put(u->secinfo_no_name.sin_exp);
+}
+
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2375,7 +2382,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO_NO_NAME] = {
.op_func = nfsd4_secinfo_no_name,
- .op_release = nfsd4_secinfo_release,
+ .op_release = nfsd4_secinfo_no_name_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = nfsd4_secinfo_rsize,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aad97b30d5e6..c441f9387a1b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
c->tmpfile = true;
err = ovl_copy_up_locked(c);
} else {
- err = -EIO;
- if (lock_rename(c->workdir, c->destdir) != NULL) {
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- } else {
+ err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+ if (!err) {
err = ovl_copy_up_locked(c);
unlock_rename(c->workdir, c->destdir);
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 3309b1912241..cc961a3bd3bd 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -216,26 +216,6 @@ out_unlock:
return err;
}
-static int ovl_lock_rename_workdir(struct dentry *workdir,
- struct dentry *upperdir)
-{
- /* Workdir should not be the same as upperdir */
- if (workdir == upperdir)
- goto err;
-
- /* Workdir should not be subdir of upperdir and vice versa */
- if (lock_rename(workdir, upperdir) != NULL)
- goto err_unlock;
-
- return 0;
-
-err_unlock:
- unlock_rename(workdir, upperdir);
-err:
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- return -EIO;
-}
-
static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct list_head *list)
{
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index a619addecafc..321511ed8c42 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -598,18 +598,30 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
return true;
}
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
+ struct dentry *index)
{
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
+ /* Already indexed or could be indexed on copy up? */
+ bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry));
+
+ if (WARN_ON(upperdentry && indexed && !lowerdentry))
+ return ERR_PTR(-EIO);
if (!realinode)
realinode = d_inode(lowerdentry);
- if (!S_ISDIR(realinode->i_mode) &&
- (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
- struct inode *key = d_inode(lowerdentry ?: upperdentry);
+ /*
+ * Copy up origin (lower) may exist for non-indexed upper, but we must
+ * not use lower as hash key in that case.
+ * Hash inodes that are or could be indexed by origin inode and
+ * non-indexed upper inodes that could be hard linked by upper inode.
+ */
+ if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) {
+ struct inode *key = d_inode(indexed ? lowerdentry :
+ upperdentry);
unsigned int nlink;
inode = iget5_locked(dentry->d_sb, (unsigned long) key,
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c3addd1114f1..a12dc10bf726 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -405,14 +405,13 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack,
* be treated as stale (i.e. after unlink of the overlay inode).
* We don't know the verification rules for directory and whiteout
* index entries, because they have not been implemented yet, so return
- * EROFS if those entries are found to avoid corrupting an index that
- * was created by a newer kernel.
+ * EINVAL if those entries are found to abort the mount to avoid
+ * corrupting an index that was created by a newer kernel.
*/
- err = -EROFS;
+ err = -EINVAL;
if (d_is_dir(index) || ovl_is_whiteout(index))
goto fail;
- err = -EINVAL;
if (index->d_name.len < sizeof(struct ovl_fh)*2)
goto fail;
@@ -506,6 +505,11 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ if (err == -ENOENT) {
+ index = NULL;
+ goto out;
+ }
pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
@@ -515,18 +519,9 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
inode = d_inode(index);
if (d_is_negative(index)) {
- if (upper && d_inode(origin)->i_nlink > 1) {
- pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
- d_inode(origin)->i_ino);
- goto fail;
- }
-
- dput(index);
- index = NULL;
+ goto out_dput;
} else if (upper && d_inode(upper) != inode) {
- pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n",
- index, inode->i_ino, d_inode(upper)->i_ino);
- goto fail;
+ goto out_dput;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
/*
@@ -546,6 +541,11 @@ out:
kfree(name.name);
return index;
+out_dput:
+ dput(index);
+ index = NULL;
+ goto out;
+
fail:
dput(index);
index = ERR_PTR(-EIO);
@@ -634,6 +634,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
if (d.redirect) {
+ err = -ENOMEM;
upperredirect = kstrdup(d.redirect, GFP_KERNEL);
if (!upperredirect)
goto out_put_upper;
@@ -708,7 +709,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
upperdentry = dget(index);
if (upperdentry || ctr) {
- inode = ovl_get_inode(dentry, upperdentry);
+ inode = ovl_get_inode(dentry, upperdentry, index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d4e8c1a08fb0..d9a0edd4e57e 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
@@ -285,7 +286,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
+ struct dentry *index);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 878a750986dd..25d9b5adcd42 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -37,6 +37,9 @@ struct ovl_fs {
bool noxattr;
/* sb common to all layers */
struct super_block *same_sb;
+ /* Did we take the inuse lock? */
+ bool upperdir_locked;
+ bool workdir_locked;
};
/* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 62e9b22a2077..698b74dd750e 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
struct path *lowerstack, unsigned int numlower)
{
int err;
+ struct dentry *index = NULL;
struct inode *dir = dentry->d_inode;
struct path path = { .mnt = mnt, .dentry = dentry };
LIST_HEAD(list);
@@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
- struct dentry *index;
-
if (p->name[0] == '.') {
if (p->len == 1)
continue;
@@ -1018,18 +1017,20 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
index = lookup_one_len(p->name, dentry, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
+ index = NULL;
break;
}
err = ovl_verify_index(index, lowerstack, numlower);
- if (err) {
- if (err == -EROFS)
- break;
+ /* Cleanup stale and orphan index entries */
+ if (err && (err == -ESTALE || err == -ENOENT))
err = ovl_cleanup(dir, index);
- if (err)
- break;
- }
+ if (err)
+ break;
+
dput(index);
+ index = NULL;
}
+ dput(index);
inode_unlock(dir);
out:
ovl_cache_free(&list);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fd5ea4facc62..f5738e96a052 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -174,6 +174,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
{
struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+ if (!oi)
+ return NULL;
+
oi->cache = NULL;
oi->redirect = NULL;
oi->version = 0;
@@ -211,9 +214,10 @@ static void ovl_put_super(struct super_block *sb)
dput(ufs->indexdir);
dput(ufs->workdir);
- ovl_inuse_unlock(ufs->workbasedir);
+ if (ufs->workdir_locked)
+ ovl_inuse_unlock(ufs->workbasedir);
dput(ufs->workbasedir);
- if (ufs->upper_mnt)
+ if (ufs->upper_mnt && ufs->upperdir_locked)
ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
@@ -881,9 +885,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_upperpath;
err = -EBUSY;
- if (!ovl_inuse_trylock(upperpath.dentry)) {
- pr_err("overlayfs: upperdir is in-use by another mount\n");
+ if (ovl_inuse_trylock(upperpath.dentry)) {
+ ufs->upperdir_locked = true;
+ } else if (ufs->config.index) {
+ pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
goto out_put_upperpath;
+ } else {
+ pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}
err = ovl_mount_dir(ufs->config.workdir, &workpath);
@@ -901,9 +909,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
}
err = -EBUSY;
- if (!ovl_inuse_trylock(workpath.dentry)) {
- pr_err("overlayfs: workdir is in-use by another mount\n");
+ if (ovl_inuse_trylock(workpath.dentry)) {
+ ufs->workdir_locked = true;
+ } else if (ufs->config.index) {
+ pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
goto out_put_workpath;
+ } else {
+ pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}
ufs->workbasedir = workpath.dentry;
@@ -1156,11 +1168,13 @@ out_put_lowerpath:
out_free_lowertmp:
kfree(lowertmp);
out_unlock_workdentry:
- ovl_inuse_unlock(workpath.dentry);
+ if (ufs->workdir_locked)
+ ovl_inuse_unlock(workpath.dentry);
out_put_workpath:
path_put(&workpath);
out_unlock_upperdentry:
- ovl_inuse_unlock(upperpath.dentry);
+ if (ufs->upperdir_locked)
+ ovl_inuse_unlock(upperpath.dentry);
out_put_upperpath:
path_put(&upperpath);
out_free_config:
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 117794582f9f..b9b239fa5cfd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry)
}
}
-/* Called must hold OVL_I(inode)->oi_lock */
+/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
@@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry)
err = PTR_ERR(index);
if (!IS_ERR(index))
err = ovl_cleanup(dir, index);
+ else
+ index = NULL;
+
inode_unlock(dir);
if (err)
goto fail;
@@ -557,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked)
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
}
+
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+{
+ /* Workdir should not be the same as upperdir */
+ if (workdir == upperdir)
+ goto err;
+
+ /* Workdir should not be subdir of upperdir and vice versa */
+ if (lock_rename(workdir, upperdir) != NULL)
+ goto err_unlock;
+
+ return 0;
+
+err_unlock:
+ unlock_rename(workdir, upperdir);
+err:
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ return -EIO;
+}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 50b0556a124f..52ad15192e72 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1297,21 +1297,18 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
spin_lock(&dquot->dq_dqb_lock);
if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
- goto add;
+ goto finish;
tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
+ space + rsv_space;
- if (flags & DQUOT_SPACE_NOFAIL)
- goto add;
-
if (dquot->dq_dqb.dqb_bhardlimit &&
tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
ret = -EDQUOT;
- goto out;
+ goto finish;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1322,7 +1319,7 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
ret = -EDQUOT;
- goto out;
+ goto finish;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1338,13 +1335,21 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
* be always printed
*/
ret = -EDQUOT;
- goto out;
+ goto finish;
}
}
-add:
- dquot->dq_dqb.dqb_rsvspace += rsv_space;
- dquot->dq_dqb.dqb_curspace += space;
-out:
+finish:
+ /*
+ * We have to be careful and go through warning generation & grace time
+ * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it
+ * only here...
+ */
+ if (flags & DQUOT_SPACE_NOFAIL)
+ ret = 0;
+ if (!ret) {
+ dquot->dq_dqb.dqb_rsvspace += rsv_space;
+ dquot->dq_dqb.dqb_curspace += space;
+ }
spin_unlock(&dquot->dq_dqb_lock);
return ret;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ef4b48d1ea42..1c713fd5b3e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
if (ACCESS_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
+ /*
+ * &ewq->wq may be queued in fork_event, but
+ * __remove_wait_queue ignores the head
+ * parameter. It would be a problem if it
+ * didn't.
+ */
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
if (ewq->msg.event == UFFD_EVENT_FORK) {
struct userfaultfd_ctx *new;
@@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
(unsigned long)
uwq->msg.arg.reserved.reserved1;
list_move(&uwq->wq.entry, &fork_event);
+ /*
+ * fork_nctx can be freed as soon as
+ * we drop the lock, unless we take a
+ * reference on it.
+ */
+ userfaultfd_ctx_get(fork_nctx);
spin_unlock(&ctx->event_wqh.lock);
ret = 0;
break;
@@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+ spin_lock(&ctx->event_wqh.lock);
+ if (!list_empty(&fork_event)) {
+ /*
+ * The fork thread didn't abort, so we can
+ * drop the temporary refcount.
+ */
+ userfaultfd_ctx_put(fork_nctx);
+
+ uwq = list_first_entry(&fork_event,
+ typeof(*uwq),
+ wq.entry);
+ /*
+ * If fork_event list wasn't empty and in turn
+ * the event wasn't already released by fork
+ * (the event is allocated on fork kernel
+ * stack), put the event back to its place in
+ * the event_wq. fork_event head will be freed
+ * as soon as we return so the event cannot
+ * stay queued there no matter the current
+ * "ret" value.
+ */
+ list_del(&uwq->wq.entry);
+ __add_wait_queue(&ctx->event_wqh, &uwq->wq);
- if (!ret) {
- spin_lock(&ctx->event_wqh.lock);
- if (!list_empty(&fork_event)) {
- uwq = list_first_entry(&fork_event,
- typeof(*uwq),
- wq.entry);
- list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->event_wqh, &uwq->wq);
+ /*
+ * Leave the event in the waitqueue and report
+ * error to userland if we failed to resolve
+ * the userfault fork.
+ */
+ if (likely(!ret))
userfaultfd_event_complete(ctx, uwq);
- }
- spin_unlock(&ctx->event_wqh.lock);
+ } else {
+ /*
+ * Here the fork thread aborted and the
+ * refcount from the fork thread on fork_nctx
+ * has already been released. We still hold
+ * the reference we took before releasing the
+ * lock above. If resolve_userfault_fork
+ * failed we've to drop it because the
+ * fork_nctx has to be freed in such case. If
+ * it succeeded we'll hold it because the new
+ * uffd references it.
+ */
+ if (ret)
+ userfaultfd_ctx_put(fork_nctx);
}
+ spin_unlock(&ctx->event_wqh.lock);
}
return ret;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4424f7fecf14..61cd28ba25f3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -250,7 +250,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value,
}
memcpy(value, buffer, len);
out:
- security_release_secctx(buffer, len);
+ kfree(buffer);
out_noalloc:
return len;
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 744dcaec34cc..f965ce832bc0 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1584,6 +1584,10 @@ xfs_alloc_ag_vextent_small(
bp = xfs_btree_get_bufs(args->mp, args->tp,
args->agno, fbno, 0);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
xfs_trans_binval(args->tp, bp);
}
args->len = 1;
@@ -2141,6 +2145,10 @@ xfs_alloc_fix_freelist(
if (error)
goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto out_agbp_relse;
+ }
xfs_trans_binval(tp, bp);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 044a363119be..89263797cf32 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1477,14 +1477,14 @@ xfs_bmap_isaeof(
int is_empty;
int error;
- bma->aeof = 0;
+ bma->aeof = false;
error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
&is_empty);
if (error)
return error;
if (is_empty) {
- bma->aeof = 1;
+ bma->aeof = true;
return 0;
}
@@ -3852,6 +3852,17 @@ xfs_trim_extent(
}
}
+/* trim extent to within eof */
+void
+xfs_trim_extent_eof(
+ struct xfs_bmbt_irec *irec,
+ struct xfs_inode *ip)
+
+{
+ xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
+ i_size_read(VFS_I(ip))));
+}
+
/*
* Trim the returned map to the required bounds
*/
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 851982a5dfbc..502e0d8fb4ff 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
+void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 988bb3f31446..dfd643909f85 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1962,7 +1962,7 @@ xfs_difree_inobt(
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
- xic->deleted = 1;
+ xic->deleted = true;
xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
@@ -1989,7 +1989,7 @@ xfs_difree_inobt(
xfs_difree_inode_chunk(mp, agno, &rec, dfops);
} else {
- xic->deleted = 0;
+ xic->deleted = false;
error = xfs_inobt_update(cur, &rec);
if (error) {
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 8372e9bcd7b6..71de185735e0 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format {
uint32_t ilf_fields; /* flags for fields logged */
uint16_t ilf_asize; /* size of attr d/ext/root */
uint16_t ilf_dsize; /* size of data/ext/root */
+ uint32_t ilf_pad; /* pad for 64 bit boundary */
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
@@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format {
int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
-typedef struct xfs_inode_log_format_32 {
- uint16_t ilf_type; /* inode log item type */
- uint16_t ilf_size; /* size of this item */
- uint32_t ilf_fields; /* flags for fields logged */
- uint16_t ilf_asize; /* size of attr d/ext/root */
- uint16_t ilf_dsize; /* size of data/ext/root */
- uint64_t ilf_ino; /* inode number */
- union {
- uint32_t ilfu_rdev; /* rdev value for dev inode*/
- uuid_t ilfu_uuid; /* mount point value */
- } ilf_u;
- int64_t ilf_blkno; /* blkno of inode buffer */
- int32_t ilf_len; /* len of inode buffer */
- int32_t ilf_boffset; /* off of inode in buffer */
-} __attribute__((packed)) xfs_inode_log_format_32_t;
-
-typedef struct xfs_inode_log_format_64 {
+/*
+ * Old 32 bit systems will log in this format without the 64 bit
+ * alignment padding. Recovery will detect this and convert it to the
+ * correct format.
+ */
+struct xfs_inode_log_format_32 {
uint16_t ilf_type; /* inode log item type */
uint16_t ilf_size; /* size of this item */
uint32_t ilf_fields; /* flags for fields logged */
uint16_t ilf_asize; /* size of attr d/ext/root */
uint16_t ilf_dsize; /* size of data/ext/root */
- uint32_t ilf_pad; /* pad for 64 bit boundary */
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
@@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 {
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
int32_t ilf_boffset; /* off of inode in buffer */
-} xfs_inode_log_format_64_t;
+} __attribute__((packed));
/*
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 7034e17535de..3354140de07e 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode)
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ umode_t mode;
+ bool set_mode = false;
int error = 0;
if (!acl)
@@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return error;
if (type == ACL_TYPE_ACCESS) {
- umode_t mode;
-
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
return error;
- error = xfs_set_mode(inode, mode);
- if (error)
- return error;
+ set_mode = true;
}
set_acl:
- return __xfs_set_acl(inode, acl, type);
+ error = __xfs_set_acl(inode, acl, type);
+ if (error)
+ return error;
+
+ /*
+ * We set the mode after successfully updating the ACL xattr because the
+ * xattr update can fail at ENOSPC and we don't want to change the mode
+ * if the ACL update hasn't been applied.
+ */
+ if (set_mode)
+ error = xfs_set_mode(inode, mode);
+
+ return error;
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..a3eeaba156c5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -446,6 +446,19 @@ xfs_imap_valid(
{
offset >>= inode->i_blkbits;
+ /*
+ * We have to make sure the cached mapping is within EOF to protect
+ * against eofblocks trimming on file release leaving us with a stale
+ * mapping. Otherwise, a page for a subsequent file extending buffered
+ * write could get picked up by this writeback cycle and written to the
+ * wrong blocks.
+ *
+ * Note that what we really want here is a generic mapping invalidation
+ * mechanism to protect us from arbitrary extent modifying contexts, not
+ * just eofblocks.
+ */
+ xfs_trim_extent_eof(imap, XFS_I(inode));
+
return offset >= imap->br_startoff &&
offset < imap->br_startoff + imap->br_blockcount;
}
@@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
{
trace_xfs_invalidatepage(page->mapping->host, page, offset,
length);
+
+ /*
+ * If we are invalidating the entire page, clear the dirty state from it
+ * so that we can check for attempts to release dirty cached pages in
+ * xfs_vm_releasepage().
+ */
+ if (offset == 0 && length >= PAGE_SIZE)
+ cancel_dirty_page(page);
block_invalidatepage(page, offset, length);
}
@@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely,
- * block_invalidatepage() can send pages that are still marked dirty
- * but otherwise have invalidated buffers.
+ * block_invalidatepage() can send pages that are still marked dirty but
+ * otherwise have invalidated buffers.
*
* We want to release the latter to avoid unnecessary buildup of the
- * LRU, skip the former and warn if we've left any lingering
- * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
- * or unwritten buffers and warn if the page is not dirty. Otherwise
- * try to release the buffers.
+ * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
+ * that are entirely invalidated and need to be released. Hence the
+ * only time we should get dirty pages here is through
+ * shrink_active_list() and so we can simply skip those now.
+ *
+ * warn if we've left any lingering delalloc/unwritten buffers on clean
+ * or invalidated pages we are about to release.
*/
+ if (PageDirty(page))
+ return 0;
+
xfs_count_page_state(page, &delalloc, &unwritten);
- if (delalloc) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(delalloc))
return 0;
- }
- if (unwritten) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(unwritten))
return 0;
- }
return try_to_free_buffers(page);
}
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index ebd66b19fbfc..e3a950ed35a8 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -302,6 +302,8 @@ xfs_attr3_node_inactive(
&bp, XFS_ATTR_FORK);
if (error)
return error;
+ node = bp->b_addr;
+ btree = dp->d_ops->node_tree_p(node);
child_fsb = be32_to_cpu(btree[i + 1].before);
xfs_trans_brelse(*trans, bp);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index bc6c6e10a969..6503cfa44262 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -84,6 +84,7 @@ xfs_zero_extent(
GFP_NOFS, 0);
}
+#ifdef CONFIG_XFS_RT
int
xfs_bmap_rtalloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -190,6 +191,7 @@ xfs_bmap_rtalloc(
}
return 0;
}
+#endif /* CONFIG_XFS_RT */
/*
* Check if the endoff is outside the last extent. If so the caller will grow
@@ -2122,11 +2124,31 @@ xfs_swap_extents(
ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
+ }
+
+ /* Swap the cow forks. */
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ xfs_extnum_t extnum;
+
+ ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+ ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+
+ extnum = ip->i_cnextents;
+ ip->i_cnextents = tip->i_cnextents;
+ tip->i_cnextents = extnum;
+
cowfp = ip->i_cowfp;
ip->i_cowfp = tip->i_cowfp;
tip->i_cowfp = cowfp;
- xfs_inode_set_cowblocks_tag(ip);
- xfs_inode_set_cowblocks_tag(tip);
+
+ if (ip->i_cowfp && ip->i_cnextents)
+ xfs_inode_set_cowblocks_tag(ip);
+ else
+ xfs_inode_clear_cowblocks_tag(ip);
+ if (tip->i_cowfp && tip->i_cnextents)
+ xfs_inode_set_cowblocks_tag(tip);
+ else
+ xfs_inode_clear_cowblocks_tag(tip);
}
xfs_trans_log_inode(tp, ip, src_log_flags);
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 0eaa81dc49be..7d330b3c77c3 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -28,7 +28,20 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_bmalloca;
+#ifdef CONFIG_XFS_RT
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
+#else /* !CONFIG_XFS_RT */
+/*
+ * Attempts to allocate RT extents when RT is disable indicates corruption and
+ * should trigger a shutdown.
+ */
+static inline int
+xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
+{
+ return -EFSCORRUPTED;
+}
+#endif /* CONFIG_XFS_RT */
+
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 309e26c9dddb..6526ef0e2a23 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,11 +237,13 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
return -EAGAIN;
+ } else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
+
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -259,9 +261,10 @@ xfs_file_buffered_aio_read(
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
return -EAGAIN;
+ } else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
ret = generic_file_read_iter(iocb, to);
@@ -552,9 +555,10 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- if (!xfs_ilock_nowait(ip, iolock)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, iolock))
return -EAGAIN;
+ } else {
xfs_ilock(ip, iolock);
}
@@ -606,9 +610,10 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- if (!xfs_ilock_nowait(ip, iolock)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, iolock))
return -EAGAIN;
+ } else {
xfs_ilock(ip, iolock);
}
@@ -764,7 +769,7 @@ xfs_file_fallocate(
enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL;
loff_t new_size = 0;
- bool do_file_insert = 0;
+ bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -825,7 +830,7 @@ xfs_file_fallocate(
error = -EINVAL;
goto out_unlock;
}
- do_file_insert = 1;
+ do_file_insert = true;
} else {
flags |= XFS_PREALLOC_SET;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 814ed729881d..43cfc07996a4 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
}
-/* Transform a rtbitmap "record" into a fsmap */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap_helper(
- struct xfs_trans *tp,
- struct xfs_rtalloc_rec *rec,
- void *priv)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_getfsmap_info *info = priv;
- struct xfs_rmap_irec irec;
- xfs_daddr_t rec_daddr;
-
- rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
-
- irec.rm_startblock = rec->ar_startblock;
- irec.rm_blockcount = rec->ar_blockcount;
- irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
- irec.rm_offset = 0;
- irec.rm_flags = 0;
-
- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
-}
-
/* Transform a bnobt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_bnobt_helper(
@@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
return xfs_getfsmap_helper(tp, info, &rmap, 0);
}
+#ifdef CONFIG_XFS_RT
+/* Transform a rtbitmap "record" into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_helper(
+ struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+}
+
/* Execute a getfsmap query against the realtime device. */
STATIC int
__xfs_getfsmap_rtdev(
@@ -561,6 +562,7 @@ xfs_getfsmap_rtdev_rtbitmap(
return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
info);
}
+#endif /* CONFIG_XFS_RT */
/* Execute a getfsmap query against the regular data device. */
STATIC int
@@ -795,7 +797,15 @@ xfs_getfsmap_check_keys(
return false;
}
+/*
+ * There are only two devices if we didn't configure RT devices at build time.
+ */
+#ifdef CONFIG_XFS_RT
#define XFS_GETFSMAP_DEVS 3
+#else
+#define XFS_GETFSMAP_DEVS 2
+#endif /* CONFIG_XFS_RT */
+
/*
* Get filesystem's extents as described in head, and format for
* output. Calls formatter to fill the user's buffer until all
@@ -853,10 +863,12 @@ xfs_getfsmap(
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
}
+#ifdef CONFIG_XFS_RT
if (mp->m_rtdev_targp) {
handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
}
+#endif /* CONFIG_XFS_RT */
xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
xfs_getfsmap_dev_compare);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a705f34b58fa..9bbc2d7cc8cb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -364,6 +364,9 @@ xfs_inode_to_log_dinode(
to->di_dmstate = from->di_dmstate;
to->di_flags = from->di_flags;
+ /* log a dummy value to ensure log structure is fully initialised */
+ to->di_next_unlinked = NULLAGINO;
+
if (from->di_version == 3) {
to->di_changecount = inode->i_version;
to->di_crtime.t_sec = from->di_crtime.t_sec;
@@ -404,6 +407,11 @@ xfs_inode_item_format_core(
* the second with the on-disk inode structure, and a possible third and/or
* fourth with the inode data/extents/b-tree root and inode attributes
* data/extents/b-tree root.
+ *
+ * Note: Always use the 64 bit inode log format structure so we don't
+ * leave an uninitialised hole in the format item on 64 bit systems. Log
+ * recovery on 32 bit systems handles this just fine, so there's no reason
+ * for not using an initialising the properly padded structure all the time.
*/
STATIC void
xfs_inode_item_format(
@@ -412,8 +420,8 @@ xfs_inode_item_format(
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- struct xfs_inode_log_format *ilf;
struct xfs_log_iovec *vecp = NULL;
+ struct xfs_inode_log_format *ilf;
ASSERT(ip->i_d.di_version > 1);
@@ -425,7 +433,17 @@ xfs_inode_item_format(
ilf->ilf_boffset = ip->i_imap.im_boffset;
ilf->ilf_fields = XFS_ILOG_CORE;
ilf->ilf_size = 2; /* format + core */
- xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
+
+ /*
+ * make sure we don't leak uninitialised data into the log in the case
+ * when we don't log every field in the inode.
+ */
+ ilf->ilf_dsize = 0;
+ ilf->ilf_asize = 0;
+ ilf->ilf_pad = 0;
+ uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null);
+
+ xlog_finish_iovec(lv, vecp, sizeof(*ilf));
xfs_inode_item_format_core(ip, lv, &vecp);
xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
@@ -855,44 +873,29 @@ xfs_istale_done(
}
/*
- * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
- * (which can have different field alignments) to the native version
+ * convert an xfs_inode_log_format struct from the old 32 bit version
+ * (which can have different field alignments) to the native 64 bit version
*/
int
xfs_inode_item_format_convert(
- xfs_log_iovec_t *buf,
- xfs_inode_log_format_t *in_f)
+ struct xfs_log_iovec *buf,
+ struct xfs_inode_log_format *in_f)
{
- if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
- xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
-
- in_f->ilf_type = in_f32->ilf_type;
- in_f->ilf_size = in_f32->ilf_size;
- in_f->ilf_fields = in_f32->ilf_fields;
- in_f->ilf_asize = in_f32->ilf_asize;
- in_f->ilf_dsize = in_f32->ilf_dsize;
- in_f->ilf_ino = in_f32->ilf_ino;
- /* copy biggest field of ilf_u */
- uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
- in_f->ilf_blkno = in_f32->ilf_blkno;
- in_f->ilf_len = in_f32->ilf_len;
- in_f->ilf_boffset = in_f32->ilf_boffset;
- return 0;
- } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
- xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
-
- in_f->ilf_type = in_f64->ilf_type;
- in_f->ilf_size = in_f64->ilf_size;
- in_f->ilf_fields = in_f64->ilf_fields;
- in_f->ilf_asize = in_f64->ilf_asize;
- in_f->ilf_dsize = in_f64->ilf_dsize;
- in_f->ilf_ino = in_f64->ilf_ino;
- /* copy biggest field of ilf_u */
- uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
- in_f->ilf_blkno = in_f64->ilf_blkno;
- in_f->ilf_len = in_f64->ilf_len;
- in_f->ilf_boffset = in_f64->ilf_boffset;
- return 0;
- }
- return -EFSCORRUPTED;
+ struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
+
+ if (buf->i_len != sizeof(*in_f32))
+ return -EFSCORRUPTED;
+
+ in_f->ilf_type = in_f32->ilf_type;
+ in_f->ilf_size = in_f32->ilf_size;
+ in_f->ilf_fields = in_f32->ilf_fields;
+ in_f->ilf_asize = in_f32->ilf_asize;
+ in_f->ilf_dsize = in_f32->ilf_dsize;
+ in_f->ilf_ino = in_f32->ilf_ino;
+ /* copy biggest field of ilf_u */
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
+ in_f->ilf_blkno = in_f32->ilf_blkno;
+ in_f->ilf_len = in_f32->ilf_len;
+ in_f->ilf_boffset = in_f32->ilf_boffset;
+ return 0;
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c5107c7bc4bf..dc95a49d62e7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2515,7 +2515,7 @@ next_lv:
if (lv)
vecp = lv->lv_iovecp;
}
- if (record_cnt == 0 && ordered == false) {
+ if (record_cnt == 0 && !ordered) {
if (!lv)
return 0;
break;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index ea7d4b4e50d0..e9727d0a541a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -704,7 +704,7 @@ xfs_mountfs(
xfs_set_maxicount(mp);
/* enable fail_at_unmount as default */
- mp->m_fail_unmount = 1;
+ mp->m_fail_unmount = true;
error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 0c381d71b242..0492436a053f 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
- XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3246815c24d6..37e603bf1591 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -736,7 +736,13 @@ xfs_reflink_end_cow(
/* If there is a hole at end_fsb - 1 go to the previous extent */
if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
got.br_startoff > end_fsb) {
- ASSERT(idx > 0);
+ /*
+ * In case of racing, overlapping AIO writes no COW extents
+ * might be left by the time I/O completes for the loser of
+ * the race. In that case we are done.
+ */
+ if (idx <= 0)
+ goto out_cancel;
xfs_iext_get_extent(ifp, --idx, &got);
}
@@ -809,6 +815,7 @@ next_extent:
out_defer:
xfs_defer_cancel(&dfops);
+out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 584cf2d573ba..f663022353c0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1637,7 +1637,7 @@ xfs_fs_fill_super(
/* version 5 superblocks support inode version counters. */
if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
- sb->s_flags |= MS_I_VERSION;
+ sb->s_flags |= SB_I_VERSION;
if (mp->m_flags & XFS_MOUNT_DAX) {
xfs_warn(mp,