summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c10
-rw-r--r--fs/binfmt_misc.c56
-rw-r--r--fs/binfmt_script.c17
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/inode.c27
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/mds_client.c18
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/snap.c8
-rw-r--r--fs/cifs/cifs_debug.c9
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/connect.c32
-rw-r--r--fs/cifs/file.c19
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/smb2ops.c42
-rw-r--r--fs/cifs/smb2pdu.c111
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/direct-io.c52
-rw-r--r--fs/exec.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/segment.c6
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/iomap.c43
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/mpage.c14
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c3
-rw-r--r--fs/nfs/nfs4idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfsd/nfs4proc.c9
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/dir.c20
-rw-r--r--fs/overlayfs/namei.c1
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/ovl_entry.h3
-rw-r--r--fs/overlayfs/readdir.c6
-rw-r--r--fs/overlayfs/super.c27
-rw-r--r--fs/overlayfs/util.c24
-rw-r--r--fs/proc/array.c44
-rw-r--r--fs/quota/dquot.c29
-rw-r--r--fs/quota/quota_v2.c4
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/userfaultfd.c66
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c12
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c21
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h27
-rw-r--r--fs/xfs/xfs_acl.c22
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_attr_inactive.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c40
-rw-r--r--fs/xfs/xfs_bmap_util.h13
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_file.c43
-rw-r--r--fs/xfs/xfs_fsmap.c12
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c81
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c9
-rw-r--r--fs/xfs/xfs_super.c10
84 files changed, 828 insertions, 377 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index adaf6f6dd858..e1cbdfdb7c68 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -310,9 +310,13 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
- if (unlikely(copied < len && !PageUptodate(page))) {
- copied = 0;
- goto out;
+ if (!PageUptodate(page)) {
+ if (unlikely(copied < len)) {
+ copied = 0;
+ goto out;
+ } else if (len == PAGE_SIZE) {
+ SetPageUptodate(page);
+ }
}
/*
* No need to use i_size_read() here, the i_size
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ce7181ea60fa..a7c5a9861bef 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -54,7 +54,7 @@ typedef struct {
int size; /* size of magic/mask */
char *magic; /* magic or filename extension */
char *mask; /* mask, NULL for exact match */
- char *interpreter; /* filename of interpreter */
+ const char *interpreter; /* filename of interpreter */
char *name;
struct dentry *dentry;
struct file *interp_file;
@@ -131,27 +131,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
{
Node *fmt;
struct file *interp_file = NULL;
- char iname[BINPRM_BUF_SIZE];
- const char *iname_addr = iname;
int retval;
int fd_binary = -1;
retval = -ENOEXEC;
if (!enabled)
- goto ret;
+ return retval;
/* to keep locking time low, we copy the interpreter string */
read_lock(&entries_lock);
fmt = check_file(bprm);
if (fmt)
- strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
+ dget(fmt->dentry);
read_unlock(&entries_lock);
if (!fmt)
- goto ret;
+ return retval;
/* Need to be able to load the file after exec */
+ retval = -ENOENT;
if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
- return -ENOENT;
+ goto ret;
if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
retval = remove_arg_zero(bprm);
@@ -195,22 +194,22 @@ static int load_misc_binary(struct linux_binprm *bprm)
bprm->argc++;
/* add the interp as argv[0] */
- retval = copy_strings_kernel(1, &iname_addr, bprm);
+ retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
if (retval < 0)
goto error;
bprm->argc++;
/* Update interp in case binfmt_script needs it. */
- retval = bprm_change_interp(iname, bprm);
+ retval = bprm_change_interp(fmt->interpreter, bprm);
if (retval < 0)
goto error;
- if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
+ if (fmt->flags & MISC_FMT_OPEN_FILE) {
interp_file = filp_clone_open(fmt->interp_file);
if (!IS_ERR(interp_file))
deny_write_access(interp_file);
} else {
- interp_file = open_exec(iname);
+ interp_file = open_exec(fmt->interpreter);
}
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
@@ -238,6 +237,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
goto error;
ret:
+ dput(fmt->dentry);
return retval;
error:
if (fd_binary > 0)
@@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
static void bm_evict_inode(struct inode *inode)
{
+ Node *e = inode->i_private;
+
+ if (e && e->flags & MISC_FMT_OPEN_FILE)
+ filp_close(e->interp_file, NULL);
+
clear_inode(inode);
- kfree(inode->i_private);
+ kfree(e);
}
static void kill_node(Node *e)
@@ -603,24 +608,14 @@ static void kill_node(Node *e)
struct dentry *dentry;
write_lock(&entries_lock);
- dentry = e->dentry;
- if (dentry) {
- list_del_init(&e->list);
- e->dentry = NULL;
- }
+ list_del_init(&e->list);
write_unlock(&entries_lock);
- if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
- filp_close(e->interp_file, NULL);
- e->interp_file = NULL;
- }
-
- if (dentry) {
- drop_nlink(d_inode(dentry));
- d_drop(dentry);
- dput(dentry);
- simple_release_fs(&bm_mnt, &entry_count);
- }
+ dentry = e->dentry;
+ drop_nlink(d_inode(dentry));
+ d_drop(dentry);
+ dput(dentry);
+ simple_release_fs(&bm_mnt, &entry_count);
}
/* /<entry> */
@@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
root = file_inode(file)->i_sb->s_root;
inode_lock(d_inode(root));
- kill_node(e);
+ if (!list_empty(&e->list))
+ kill_node(e);
inode_unlock(d_inode(root));
break;
@@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
inode_lock(d_inode(root));
while (!list_empty(&entries))
- kill_node(list_entry(entries.next, Node, list));
+ kill_node(list_first_entry(&entries, Node, list));
inode_unlock(d_inode(root));
break;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index afdf4e3cafc2..7cde3f46ad26 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm)
const char *i_arg, *i_name;
char *cp;
struct file *file;
- char interp[BINPRM_BUF_SIZE];
int retval;
if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
@@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm)
break;
}
for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
- if (*cp == '\0')
+ if (*cp == '\0')
return -ENOEXEC; /* No interpreter name found */
i_name = cp;
i_arg = NULL;
@@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm)
*cp++ = '\0';
if (*cp)
i_arg = cp;
- strcpy (interp, i_name);
/*
* OK, we've parsed out the interpreter name and
* (optional) argument.
@@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm)
if (retval)
return retval;
retval = copy_strings_kernel(1, &bprm->interp, bprm);
- if (retval < 0) return retval;
+ if (retval < 0)
+ return retval;
bprm->argc++;
if (i_arg) {
retval = copy_strings_kernel(1, &i_arg, bprm);
- if (retval < 0) return retval;
+ if (retval < 0)
+ return retval;
bprm->argc++;
}
retval = copy_strings_kernel(1, &i_name, bprm);
- if (retval) return retval;
+ if (retval)
+ return retval;
bprm->argc++;
- retval = bprm_change_interp(interp, bprm);
+ retval = bprm_change_interp(i_name, bprm);
if (retval < 0)
return retval;
/*
* OK, now restart the process with the interpreter's dentry.
*/
- file = open_exec(interp);
+ file = open_exec(i_name);
if (IS_ERR(file))
return PTR_ERR(file);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93d088ffc05c..789f55e851ae 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
set_page_writeback(page);
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
- if (result)
+ if (result) {
end_page_writeback(page);
- else
+ } else {
+ clean_page_buffers(page);
unlock_page(page);
+ }
blk_queue_exit(bdev->bd_queue);
return result;
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b51d23f5cafa..280384bf34f1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio)
struct inode *inode;
struct page *page;
unsigned long index;
- int ret;
+ unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
+ int ret = 0;
if (bio->bi_status)
cb->errors = 1;
@@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio)
if (!refcount_dec_and_test(&cb->pending_bios))
goto out;
+ /*
+ * Record the correct mirror_num in cb->orig_bio so that
+ * read-repair can work properly.
+ */
+ ASSERT(btrfs_io_bio(cb->orig_bio));
+ btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
+ cb->mirror_num = mirror;
+
+ /*
+ * Some IO in this cb have failed, just skip checksum as there
+ * is no way it could be correct.
+ */
+ if (cb->errors == 1)
+ goto csum_failed;
+
inode = cb->inode;
ret = check_compressed_csum(BTRFS_I(inode), cb,
(u64)bio->bi_iter.bi_sector << 9);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5a8933da39a7..8fc690384c58 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -709,7 +709,6 @@ struct btrfs_delayed_root;
#define BTRFS_FS_OPEN 5
#define BTRFS_FS_QUOTA_ENABLED 6
#define BTRFS_FS_QUOTA_ENABLING 7
-#define BTRFS_FS_QUOTA_DISABLING 8
#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9
#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10
#define BTRFS_FS_BTREE_ERR 11
@@ -723,7 +722,7 @@ struct btrfs_delayed_root;
* Indicate that a whole-filesystem exclusive operation is running
* (device replace, resize, device add/delete, balance)
*/
-#define BTRFS_FS_EXCL_OP 14
+#define BTRFS_FS_EXCL_OP 16
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 487bbe4fb3c6..dfdab849037b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3643,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
u64 flags;
do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
- backup_super_roots(fs_info);
+
+ /*
+ * max_mirrors == 0 indicates we're from commit_transaction,
+ * not from fsync where the tree roots in fs_info have not
+ * been consistent on disk.
+ */
+ if (max_mirrors == 0)
+ backup_super_roots(fs_info);
sb = fs_info->super_for_commit;
dev_item = &sb->dev_item;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e5bb0cdd3cd..970190cd347e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, sector << 9);
+ bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
unsigned int write_flags = 0;
unsigned long nr_written = 0;
- if (wbc->sync_mode == WB_SYNC_ALL)
- write_flags = REQ_SYNC;
+ write_flags = wbc_to_write_flags(wbc);
trace___extent_writepage(page, inode, wbc);
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
unsigned long i, num_pages;
unsigned long bio_flags = 0;
unsigned long start, end;
- unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
+ unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
if (epd->bio) {
int ret;
- bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
- epd->sync_io ? REQ_SYNC : 0);
-
ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 128f3e58634f..d94e3f68b9b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
const u64 offset,
const u64 bytes)
{
+ unsigned long index = offset >> PAGE_SHIFT;
+ unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(inode->i_mapping, index);
+ index++;
+ if (!page)
+ continue;
+ ClearPagePrivate2(page);
+ put_page(page);
+ }
return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
bytes - PAGE_SIZE, false);
}
@@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
blk_status_t err = bio->bi_status;
- if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) {
+ if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
err = btrfs_subio_endio_read(inode, io_bio, err);
- if (!err)
- bio->bi_status = 0;
- }
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
@@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
kfree(dip);
- dio_bio->bi_status = bio->bi_status;
+ dio_bio->bi_status = err;
dio_end_io(dio_bio);
if (io_bio->end_io)
@@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
btrfs_work_func_t func;
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
+ u64 last_offset;
int ret;
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
}
again:
+ last_offset = ordered_offset;
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
ordered_bytes,
@@ -8409,6 +8420,12 @@ again:
btrfs_queue_work(wq, &ordered->work);
out_test:
/*
+ * If btrfs_dec_test_ordered_pending does not find any ordered extent
+ * in the range, we can exit.
+ */
+ if (ordered_offset == last_offset)
+ return;
+ /*
* our bio might span multiple ordered extents. If we haven't
* completed the accounting for the whole dio, go back and try again
*/
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d6715c2bcdc4..6c7a49faf4e0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2773,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
}
mutex_unlock(&fs_devices->device_list_mutex);
- fi_args->nodesize = fs_info->super_copy->nodesize;
- fi_args->sectorsize = fs_info->super_copy->sectorsize;
- fi_args->clone_alignment = fs_info->super_copy->sectorsize;
+ fi_args->nodesize = fs_info->nodesize;
+ fi_args->sectorsize = fs_info->sectorsize;
+ fi_args->clone_alignment = fs_info->sectorsize;
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
@@ -3032,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
out:
if (ret)
btrfs_cmp_data_free(cmp);
- return 0;
+ return ret;
}
static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
@@ -4061,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
ret = PTR_ERR(new_root);
goto out;
}
+ if (!is_fstree(new_root->objectid)) {
+ ret = -ENOENT;
+ goto out;
+ }
path = btrfs_alloc_path();
if (!path) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c8b61c86e61..e172d4843eae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
}
ret = 0;
out:
- set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
btrfs_free_path(path);
return ret;
}
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
if (!fs_info->quota_root)
goto out;
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
@@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
}
}
ret = del_qgroup_item(trans, quota_root, qgroupid);
+ if (ret && ret != -ENOENT)
+ goto out;
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
@@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
- clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
spin_lock(&fs_info->qgroup_lock);
while (!list_empty(&fs_info->dirty_qgroups)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3a49a3c2fca4..9841faef08ea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
while (!list_empty(list)) {
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
+ __del_reloc_root(reloc_root);
free_extent_buffer(reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
reloc_root->node = NULL;
reloc_root->commit_root = NULL;
- __del_reloc_root(reloc_root);
}
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 32b043ef8ac9..8fd195cfe81b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2630,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
} else {
btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
(int)(mode & S_IFMT));
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ad7f4bab640b..c800d067fcbf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct extent_map *em, *n;
struct list_head extents;
struct extent_map_tree *tree = &inode->extent_tree;
+ u64 logged_start, logged_end;
u64 test_gen;
int ret = 0;
int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
down_write(&inode->dio_sem);
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
+ logged_start = start;
+ logged_end = end;
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
list_del_init(&em->list);
-
/*
* Just an arbitrary number, this can be really CPU intensive
* once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
+
+ if (em->start < logged_start)
+ logged_start = em->start;
+ if ((em->start + em->len - 1) > logged_end)
+ logged_end = em->start + em->len - 1;
+
/* Need a ref to keep it from getting evicted from cache */
refcount_inc(&em->refs);
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
- btrfs_get_logged_extents(inode, logged_list, start, end);
+ btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
* Some ordered extents started by fsync might have completed
* before we could collect them into the list logged_list, which
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..b39737568c22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
map_length = length;
btrfs_bio_counter_inc_blocked(fs_info);
- ret = __btrfs_map_block(fs_info, bio_op(bio), logical,
+ ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
&map_length, &bbio, mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(fs_info);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..f23c820daaed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
-#include <linux/utsname.h>
#include <linux/ratelimit.h>
#include "super.h"
@@ -735,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = req->r_inode;
ihold(inode);
} else {
- /* req->r_dentry is non-null for LSSNAP request.
- * fall-thru */
- WARN_ON_ONCE(!req->r_dentry);
+ /* req->r_dentry is non-null for LSSNAP request */
+ rcu_read_lock();
+ inode = get_nonsnap_parent(req->r_dentry);
+ rcu_read_unlock();
+ dout("__choose_mds using snapdir's parent %p\n", inode);
}
- }
- if (!inode && req->r_dentry) {
+ } else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
struct dentry *parent;
struct inode *dir;
@@ -884,8 +884,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
void *p;
const char* metadata[][2] = {
- {"hostname", utsname()->nodename},
- {"kernel_version", utsname()->release},
+ {"hostname", mdsc->nodename},
+ {"kernel_version", init_utsname()->release},
{"entity_id", opt->name ? : ""},
{"root", fsopt->server_path ? : "/"},
{NULL, NULL}
@@ -3539,6 +3539,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_rwsem(&mdsc->pool_perm_rwsem);
mdsc->pool_perm_tree = RB_ROOT;
+ strncpy(mdsc->nodename, utsname()->nodename,
+ sizeof(mdsc->nodename) - 1);
return 0;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/refcount.h>
+#include <linux/utsname.h>
#include <linux/ceph/types.h>
#include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
struct rw_semaphore pool_perm_rwsem;
struct rb_root pool_perm_tree;
+
+ char nodename[__NEW_UTS_LEN + 1];
};
extern const char *ceph_mds_op_name(int op);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 1ffc8b426c1c..7fc0b850c352 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm,
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
- if (realm->cached_context) {
- ceph_put_snap_context(realm->cached_context);
- /* queue realm for cap_snap creation */
- list_add_tail(&realm->dirty_item, dirty_realms);
- }
+ ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
+ /* queue realm for cap_snap creation */
+ list_add_tail(&realm->dirty_item, dirty_realms);
return 0;
fail:
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9727e1dcacd5..cbb9534b89b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
if ((ses->serverDomain == NULL) ||
(ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- seq_printf(m, "\n%d) entry for %s not fully "
- "displayed\n\t", i, ses->serverName);
+ seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+ i, ses->serverName, ses->ses_count,
+ ses->capabilities, ses->status);
+ if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+ seq_printf(m, "Guest\t");
+ else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+ seq_printf(m, "Anonymous\t");
} else {
seq_printf(m,
"\n%d) Name: %s Domain: %s Uses: %d OS:"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 180b3356ff86..8c8b75d33f31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",nocase");
if (tcon->retry)
seq_puts(s, ",hard");
+ else
+ seq_puts(s, ",soft");
if (tcon->use_persistent)
seq_puts(s, ",persistenthandles");
else if (tcon->use_resilient)
@@ -1447,7 +1449,7 @@ exit_cifs(void)
exit_cifs_idmap();
#endif
#ifdef CONFIG_CIFS_UPCALL
- unregister_key_type(&cifs_spnego_key_type);
+ exit_cifs_spnego();
#endif
cifs_destroy_request_bufs();
cifs_destroy_mids();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30bf89b1fd9a..5a10e566f0e6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.09"
+#define CIFS_VERSION "2.10"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 808486c29f0d..de5b2e1fcce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ enum smb_version {
#ifdef CONFIG_CIFS_SMB311
Smb_311,
#endif /* SMB311 */
+ Smb_3any,
+ Smb_default,
Smb_version_err
};
@@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values;
#define SMB21_VERSION_STRING "2.1"
extern struct smb_version_operations smb21_operations;
extern struct smb_version_values smb21_values;
+#define SMBDEFAULT_VERSION_STRING "default"
+extern struct smb_version_values smbdefault_values;
+#define SMB3ANY_VERSION_STRING "3"
+extern struct smb_version_values smb3any_values;
#define SMB30_VERSION_STRING "3.0"
extern struct smb_version_operations smb30_operations;
extern struct smb_version_values smb30_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5aa2d278ca84..0bfc2280436d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = {
{ Smb_311, SMB311_VERSION_STRING },
{ Smb_311, ALT_SMB311_VERSION_STRING },
#endif /* SMB311 */
+ { Smb_3any, SMB3ANY_VERSION_STRING },
+ { Smb_default, SMBDEFAULT_VERSION_STRING },
{ Smb_version_err, NULL }
};
@@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
vol->vals = &smb311_values;
break;
#endif /* SMB311 */
+ case Smb_3any:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smb3any_values;
+ break;
+ case Smb_default:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smbdefault_values;
+ break;
default:
cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
return 1;
@@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->actimeo = CIFS_DEF_ACTIMEO;
- /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
- vol->ops = &smb30_operations; /* both secure and accepted widely */
- vol->vals = &smb30_values;
+ /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
+ vol->ops = &smb30_operations;
+ vol->vals = &smbdefault_values;
vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
@@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (got_version == false)
pr_warn("No dialect specified on mount. Default has changed to "
- "a more secure dialect, SMB3 (vers=3.0), from CIFS "
+ "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS "
"(SMB1). To use the less secure SMB1 dialect to access "
- "old servers which do not support SMB3 specify vers=1.0"
- " on mount. For somewhat newer servers such as Windows "
- "7 try vers=2.1.\n");
+ "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0"
+ " on mount.\n");
kfree(mountdata_copy);
return 0;
@@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
if (vol->nosharesock)
return 0;
+ /* BB update this for smb3any and default case */
if ((server->vals != vol->vals) || (server->ops != vol->ops))
return 0;
@@ -4144,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
server->sec_mode, server->capabilities, server->timeAdj);
+ if (ses->auth_key.response) {
+ cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+ ses->auth_key.response);
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ ses->auth_key.len = 0;
+ }
+
if (server->ops->sess_setup)
rc = server->ops->sess_setup(xid, ses, nls_info);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0786f19d288f..92fdf9c35de2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
if (backup_cred(cifs_sb))
create_options |= CREATE_OPEN_BACKUP_INTENT;
+ /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+ if (f_flags & O_SYNC)
+ create_options |= CREATE_WRITE_THROUGH;
+
+ if (f_flags & O_DIRECT)
+ create_options |= CREATE_NO_BUFFER;
+
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = desired_access;
@@ -1102,8 +1109,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
struct cifs_tcon *tcon;
unsigned int num, max_num, max_buf;
LOCKING_ANDX_RANGE *buf, *cur;
- int types[] = {LOCKING_ANDX_LARGE_FILES,
- LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+ static const int types[] = {
+ LOCKING_ANDX_LARGE_FILES,
+ LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+ };
int i;
xid = get_xid();
@@ -1434,8 +1443,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
unsigned int xid)
{
int rc = 0, stored_rc;
- int types[] = {LOCKING_ANDX_LARGE_FILES,
- LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+ static const int types[] = {
+ LOCKING_ANDX_LARGE_FILES,
+ LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+ };
unsigned int i;
unsigned int max_num, num, max_buf;
LOCKING_ANDX_RANGE *buf, *cur;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8693632235f..7c732cb44164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+ /* old POSIX extensions don't get create time */
+
fattr->cf_mode = le64_to_cpu(info->Permissions);
/*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
stat->blksize = CIFS_MAX_MSGSIZE;
stat->ino = CIFS_I(inode)->uniqueid;
+ /* old CIFS Unix Extensions doesn't return create time */
+ if (CIFS_I(inode)->createtime) {
+ stat->result_mask |= STATX_BTIME;
+ stat->btime =
+ cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
+ }
+
+ stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
+ if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
+ stat->attributes |= STATX_ATTR_COMPRESSED;
+ if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
+ stat->attributes |= STATX_ATTR_ENCRYPTED;
+
/*
* If on a multiuser mount without unix extensions or cifsacl being
* enabled, and the admin hasn't overridden them, set the ownership
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb2934b9b97c..0dafdbae1f8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+#ifdef CONFIG_CIFS_XATTR
static ssize_t
move_smb2_ea_to_cifs(char *dst, size_t dst_size,
struct smb2_file_full_ea_info *src, size_t src_size,
@@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+#endif
static bool
smb2_can_echo(struct TCP_Server_Info *server)
@@ -3110,6 +3112,46 @@ struct smb_version_values smb21_values = {
.create_lease_size = sizeof(struct create_lease),
};
+struct smb_version_values smb3any_values = {
+ .version_string = SMB3ANY_VERSION_STRING,
+ .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .create_lease_size = sizeof(struct create_lease_v2),
+};
+
+struct smb_version_values smbdefault_values = {
+ .version_string = SMBDEFAULT_VERSION_STRING,
+ .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .create_lease_size = sizeof(struct create_lease_v2),
+};
+
struct smb_version_values smb30_values = {
.version_string = SMB30_VERSION_STRING,
.protocol_id = SMB30_PROT_ID,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5531e7ee1210..6f0e6343c15e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
req->NegotiateContextCount = cpu_to_le16(2);
- inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+ inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
+ sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
}
#else
@@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
req->hdr.sync_hdr.SessionId = 0;
- req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
-
- req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
- inc_rfc1001_len(req, 2);
+ if (strcmp(ses->server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) {
+ req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+ req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+ req->DialectCount = cpu_to_le16(2);
+ inc_rfc1001_len(req, 4);
+ } else if (strcmp(ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0) {
+ req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+ req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+ req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+ req->DialectCount = cpu_to_le16(3);
+ inc_rfc1001_len(req, 6);
+ } else {
+ /* otherwise send specific dialect */
+ req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
+ req->DialectCount = cpu_to_le16(1);
+ inc_rfc1001_len(req, 2);
+ }
/* only one of SMB2 signing flags may be set in SMB2 request */
if (ses->sign)
@@ -528,16 +543,43 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
*/
if (rc == -EOPNOTSUPP) {
cifs_dbg(VFS, "Dialect not supported by server. Consider "
- "specifying vers=1.0 or vers=2.1 on mount for accessing"
+ "specifying vers=1.0 or vers=2.0 on mount for accessing"
" older servers\n");
goto neg_exit;
} else if (rc != 0)
goto neg_exit;
+ if (strcmp(ses->server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) {
+ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+ cifs_dbg(VFS,
+ "SMB2 dialect returned but not requested\n");
+ return -EIO;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+ cifs_dbg(VFS,
+ "SMB2.1 dialect returned but not requested\n");
+ return -EIO;
+ }
+ } else if (strcmp(ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0) {
+ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+ cifs_dbg(VFS,
+ "SMB2 dialect returned but not requested\n");
+ return -EIO;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+ /* ops set to 3.0 by default for default so update */
+ ses->server->ops = &smb21_operations;
+ }
+ } else if (le16_to_cpu(rsp->DialectRevision) !=
+ ses->server->vals->protocol_id) {
+ /* if requested single dialect ensure returned dialect matched */
+ cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
+ le16_to_cpu(rsp->DialectRevision));
+ return -EIO;
+ }
+
cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
- /* BB we may eventually want to match the negotiated vs. requested
- dialect, even though we are only requesting one at a time */
if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
@@ -558,6 +600,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
}
server->dialect = le16_to_cpu(rsp->DialectRevision);
+ /* BB: add check that dialect was valid given dialect(s) we asked for */
+
/* SMB2 only has an extended negflavor */
server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
/* set it to the maximum buffer size value we can send with 1 credit */
@@ -606,20 +650,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
struct validate_negotiate_info_req vneg_inbuf;
struct validate_negotiate_info_rsp *pneg_rsp;
u32 rsplen;
+ u32 inbuflen; /* max of 4 dialects */
cifs_dbg(FYI, "validate negotiate\n");
/*
* validation ioctl must be signed, so no point sending this if we
- * can not sign it. We could eventually change this to selectively
+ * can not sign it (ie are not known user). Even if signing is not
+ * required (enabled but not negotiated), in those cases we selectively
* sign just this, the first and only signed request on a connection.
- * This is good enough for now since a user who wants better security
- * would also enable signing on the mount. Having validation of
- * negotiate info for signed connections helps reduce attack vectors
+ * Having validation of negotiate info helps reduce attack vectors.
*/
- if (tcon->ses->server->sign == false)
+ if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
return 0; /* validation requires signing */
+ if (tcon->ses->user_name == NULL) {
+ cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
+ return 0; /* validation requires signing */
+ }
+
+ if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+ cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
+
vneg_inbuf.Capabilities =
cpu_to_le32(tcon->ses->server->vals->req_capabilities);
memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
@@ -634,9 +686,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
else
vneg_inbuf.SecurityMode = 0;
- vneg_inbuf.DialectCount = cpu_to_le16(1);
- vneg_inbuf.Dialects[0] =
- cpu_to_le16(tcon->ses->server->vals->protocol_id);
+
+ if (strcmp(tcon->ses->server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) {
+ vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+ vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+ vneg_inbuf.DialectCount = cpu_to_le16(2);
+ /* structure is big enough for 3 dialects, sending only 2 */
+ inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+ } else if (strcmp(tcon->ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0) {
+ vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+ vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+ vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+ vneg_inbuf.DialectCount = cpu_to_le16(3);
+ /* structure is big enough for 3 dialects */
+ inbuflen = sizeof(struct validate_negotiate_info_req);
+ } else {
+ /* otherwise specific dialect was requested */
+ vneg_inbuf.Dialects[0] =
+ cpu_to_le16(tcon->ses->server->vals->protocol_id);
+ vneg_inbuf.DialectCount = cpu_to_le16(1);
+ /* structure is big enough for 3 dialects, sending only 1 */
+ inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+ }
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
@@ -1110,6 +1183,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
while (sess_data->func)
sess_data->func(sess_data);
+ if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
+ cifs_dbg(VFS, "signing requested but authenticated as guest\n");
rc = sess_data->result;
out:
kfree(sess_data);
@@ -1634,7 +1709,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[4];
- struct kvec rsp_iov;
+ struct kvec rsp_iov = {NULL, 0};
int resp_buftype;
int uni_path_len;
__le16 *copy_path = NULL;
@@ -1763,7 +1838,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
if (rc != 0) {
cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
- if (err_buf)
+ if (err_buf && rsp)
*err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
GFP_KERNEL);
goto creat_exit;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 393ed5f4e1b6..6c9653a130c8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -716,7 +716,7 @@ struct validate_negotiate_info_req {
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+ __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
} __packed;
struct validate_negotiate_info_rsp {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..96415c65bbdc 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
{
loff_t offset = dio->iocb->ki_pos;
ssize_t transferred = 0;
+ int err;
/*
* AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if (ret == 0)
ret = transferred;
+ /*
+ * Try again to invalidate clean pages which might have been cached by
+ * non-direct readahead, or faulted in by get_user_pages() if the source
+ * of the write was an mmap'ed region of the file we're writing. Either
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ */
+ if (ret > 0 && dio->op == REQ_OP_WRITE &&
+ dio->inode->i_mapping->nrpages) {
+ err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ (offset + ret - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(err);
+ }
+
if (dio->end_io) {
- int err;
// XXX: ki_pos??
err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
struct dio *dio = bio->bi_private;
unsigned long remaining;
unsigned long flags;
+ bool defer_completion = false;
/* cleanup the bio */
dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- if (dio->result && dio->defer_completion) {
+ /*
+ * Defer completion when defer_completion is set or
+ * when the inode has pages mapped and this is AIO write.
+ * We need to invalidate those pages because there is a
+ * chance they contain stale data in the case buffered IO
+ * went in between AIO submission and completion into the
+ * same region.
+ */
+ if (dio->result)
+ defer_completion = dio->defer_completion ||
+ (dio->op == REQ_OP_WRITE &&
+ dio->inode->i_mapping->nrpages);
+ if (defer_completion) {
INIT_WORK(&dio->complete_work, dio_aio_complete_work);
queue_work(dio->inode->i_sb->s_dio_done_wq,
&dio->complete_work);
@@ -838,7 +866,8 @@ out:
*/
if (sdio->boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- dio_bio_submit(dio, sdio);
+ if (sdio->bio)
+ dio_bio_submit(dio, sdio);
put_page(sdio->cur_page);
sdio->cur_page = NULL;
}
@@ -1210,10 +1239,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
* so that we can call ->fsync.
*/
- if (dio->is_async && iov_iter_rw(iter) == WRITE &&
- ((iocb->ki_filp->f_flags & O_DSYNC) ||
- IS_SYNC(iocb->ki_filp->f_mapping->host))) {
- retval = dio_set_defer_completion(dio);
+ if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+ retval = 0;
+ if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+ IS_SYNC(iocb->ki_filp->f_mapping->host))
+ retval = dio_set_defer_completion(dio);
+ else if (!dio->inode->i_sb->s_dio_done_wq) {
+ /*
+ * In case of AIO write racing with buffered read we
+ * need to defer completion. We can't decide this now,
+ * however the workqueue needs to be initialized here.
+ */
+ retval = sb_init_dio_done_wq(dio->inode->i_sb);
+ }
if (retval) {
/*
* We grab i_mutex only for reads so we don't have
diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..5470d3c1892a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm)
kfree(bprm);
}
-int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
{
/* If a binfmt changed the interp, free it first. */
if (bprm->interp != bprm->filename)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a7c90386947..4b4a72f392be 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2525,7 +2525,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
void stop_discard_thread(struct f2fs_sb_info *sbi);
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 621b9b3d320b..c695ff462ee6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1210,11 +1210,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
}
/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
{
__issue_discard_cmd(sbi, false);
__drop_discard_cmd(sbi);
- __wait_discard_cmd(sbi, false);
+ __wait_discard_cmd(sbi, !umount);
}
static void mark_discard_range_all(struct f2fs_sb_info *sbi)
@@ -2244,7 +2244,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
}
/* It's time to issue all the filed discards */
mark_discard_range_all(sbi);
- f2fs_wait_discard_bios(sbi);
+ f2fs_wait_discard_bios(sbi, false);
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
return err;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 89f61eb3d167..933c3d529e65 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
- f2fs_wait_discard_bios(sbi);
+ f2fs_wait_discard_bios(sbi, true);
if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
struct cp_control cpc = {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0491da3b28c3..448a1119f0be 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p,
* specific si_codes. In that case use SI_SIGIO instead
* to remove the ambiguity.
*/
- if (sig_specific_sicodes(signum))
+ if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
si.si_code = SI_SIGIO;
/* Make sure we are called with one of the POLL_*
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
- int ret;
-
- if (gi->last_pos <= *pos)
- n = (*pos - gi->last_pos);
- ret = rhashtable_walk_start(&gi->hti);
- if (ret)
+ rhashtable_walk_enter(&gl_hash_table, &gi->hti);
+ if (rhashtable_walk_start(&gi->hti) != 0)
return NULL;
do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
} while (gi->gl && n--);
gi->last_pos = *pos;
+
return gi->gl;
}
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
(*pos)++;
gi->last_pos = *pos;
gfs2_glock_iter_next(gi);
+
return gi->gl;
}
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
gi->gl = NULL;
rhashtable_walk_stop(&gi->hti);
+ rhashtable_walk_exit(&gi->hti);
}
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private;
- gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
- rhashtable_walk_enter(&gl_hash_table, &gi->hti);
}
return ret;
}
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
struct gfs2_glock_iter *gi = seq->private;
gi->gl = NULL;
- rhashtable_walk_exit(&gi->hti);
return seq_release_private(inode, file);
}
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..be61cf742b5e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
+ struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ /*
+ * Try again to invalidate clean pages which might have been cached by
+ * non-direct readahead, or faulted in by get_user_pages() if the source
+ * of the write was an mmap'ed region of the file we're writing. Either
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ */
+ if (!dio->error &&
+ (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ iocb->ki_pos >> PAGE_SHIFT,
+ (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(ret);
+ }
+
if (dio->end_io) {
ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size,
@@ -993,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
WARN_ON_ONCE(ret);
ret = 0;
+ if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+ !inode->i_sb->s_dio_done_wq) {
+ ret = sb_init_dio_done_wq(inode->i_sb);
+ if (ret < 0)
+ goto out_free_dio;
+ }
+
inode_dio_begin(inode);
blk_start_plug(&plug);
@@ -1015,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (ret < 0)
iomap_dio_set_error(dio, ret);
- if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
- !inode->i_sb->s_dio_done_wq) {
- ret = sb_init_dio_done_wq(inode->i_sb);
- if (ret < 0)
- iomap_dio_set_error(dio, ret);
- }
-
if (!atomic_dec_and_test(&dio->ref)) {
if (!is_sync_kiocb(iocb))
return -EIOCBQUEUED;
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
ret = iomap_dio_complete(dio);
- /*
- * Try again to invalidate clean pages which might have been cached by
- * non-direct readahead, or faulted in by get_user_pages() if the source
- * of the write was an mmap'ed region of the file we're writing. Either
- * one is a pretty crazy thing to do, so we don't support it 100%. If
- * this invalidation fails, tough, the write still worked...
- */
- if (iov_iter_rw(iter) == WRITE) {
- int err = invalidate_inode_pages2_range(mapping,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT);
- WARN_ON_ONCE(err);
- }
-
return ret;
out_free_dio:
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
if (sbi->s_fmode != ISOFS_INVALID_MODE)
seq_printf(m, ",fmode=%o", sbi->s_fmode);
+#ifdef CONFIG_JOLIET
if (sbi->s_nls_iocharset &&
strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
return 0;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index 37bb77c1302c..c991faec70b9 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped)
try_to_free_buffers(page);
}
+/*
+ * For situations where we want to clean all buffers attached to a page.
+ * We don't need to calculate how many buffers are attached to the page,
+ * we just need to specify a number larger than the maximum number of buffers.
+ */
+void clean_page_buffers(struct page *page)
+{
+ clean_buffers(page, ~0U);
+}
+
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -605,10 +615,8 @@ alloc_new:
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
- page, wbc)) {
- clean_buffers(page, first_unmapped);
+ page, wbc))
goto out;
- }
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..3b601f115b6c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file)
/* File refers to upper, writable layer? */
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
- if (upperdentry && file_inode(file) == d_inode(upperdentry))
+ if (upperdentry &&
+ (file_inode(file) == d_inode(upperdentry) ||
+ file_inode(file) == d_inode(dentry)))
return 0;
/* Lower layer: can't write to real file, sorry... */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index efebe6cf4378..22880ef6d8dd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,7 +218,6 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
static void pnfs_init_server(struct nfs_server *server)
{
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
- rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
}
#else
@@ -888,6 +887,7 @@ struct nfs_server *nfs_alloc_server(void)
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);
+ rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
return server;
}
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 44c638b7876c..508126eb49f9 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -745,7 +745,8 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
- nfs4_fl_put_deviceid(fl->dsaddr);
+ if (fl->dsaddr != NULL)
+ nfs4_fl_put_deviceid(fl->dsaddr);
/* This assumes a single RW lseg */
if (lseg->pls_range.iomode == IOMODE_RW) {
struct nfs4_filelayout *flo;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index dd5d27da8c0c..30426c1a1bbd 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -274,7 +274,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
- if (ret <= 0)
+ if (ret < 0)
return ERR_PTR(ret);
rkey = request_key(&key_type_id_resolver, desc, "");
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6c61e2b99635..f90090e8c959 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8399,8 +8399,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
lo = NFS_I(inode)->layout;
/* If the open stateid was bad, then recover it. */
if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
- nfs4_stateid_match_other(&lgp->args.stateid,
- &lgp->args.ctx->state->stateid)) {
+ !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
spin_unlock(&inode->i_lock);
exception->state = lgp->args.ctx->state;
exception->stateid = &lgp->args.stateid;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 37c8af003275..14ed9791ec9c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1842,8 +1842,8 @@ static void encode_create_session(struct xdr_stream *xdr,
* Assumes OPEN is the biggest non-idempotent compound.
* 2 is the verifier.
*/
- max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE +
- RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT;
+ max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + 2)
+ * XDR_UNIT + RPC_MAX_AUTH_SIZE;
encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3c69db7d4905..8487486ec496 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -927,6 +927,13 @@ nfsd4_secinfo_release(union nfsd4_op_u *u)
exp_put(u->secinfo.si_exp);
}
+static void
+nfsd4_secinfo_no_name_release(union nfsd4_op_u *u)
+{
+ if (u->secinfo_no_name.sin_exp)
+ exp_put(u->secinfo_no_name.sin_exp);
+}
+
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2375,7 +2382,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO_NO_NAME] = {
.op_func = nfsd4_secinfo_no_name,
- .op_release = nfsd4_secinfo_release,
+ .op_release = nfsd4_secinfo_no_name_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = nfsd4_secinfo_rsize,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aad97b30d5e6..c441f9387a1b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
c->tmpfile = true;
err = ovl_copy_up_locked(c);
} else {
- err = -EIO;
- if (lock_rename(c->workdir, c->destdir) != NULL) {
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- } else {
+ err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+ if (!err) {
err = ovl_copy_up_locked(c);
unlock_rename(c->workdir, c->destdir);
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 3309b1912241..cc961a3bd3bd 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -216,26 +216,6 @@ out_unlock:
return err;
}
-static int ovl_lock_rename_workdir(struct dentry *workdir,
- struct dentry *upperdir)
-{
- /* Workdir should not be the same as upperdir */
- if (workdir == upperdir)
- goto err;
-
- /* Workdir should not be subdir of upperdir and vice versa */
- if (lock_rename(workdir, upperdir) != NULL)
- goto err_unlock;
-
- return 0;
-
-err_unlock:
- unlock_rename(workdir, upperdir);
-err:
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- return -EIO;
-}
-
static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct list_head *list)
{
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c3addd1114f1..654bea1a5ac9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -506,6 +506,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
+ err = PTR_ERR(index);
pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d4e8c1a08fb0..c706a6f99928 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 878a750986dd..25d9b5adcd42 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -37,6 +37,9 @@ struct ovl_fs {
bool noxattr;
/* sb common to all layers */
struct super_block *same_sb;
+ /* Did we take the inuse lock? */
+ bool upperdir_locked;
+ bool workdir_locked;
};
/* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 62e9b22a2077..0f85ee9c3268 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
struct path *lowerstack, unsigned int numlower)
{
int err;
+ struct dentry *index = NULL;
struct inode *dir = dentry->d_inode;
struct path path = { .mnt = mnt, .dentry = dentry };
LIST_HEAD(list);
@@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
- struct dentry *index;
-
if (p->name[0] == '.') {
if (p->len == 1)
continue;
@@ -1018,6 +1017,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
index = lookup_one_len(p->name, dentry, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
+ index = NULL;
break;
}
err = ovl_verify_index(index, lowerstack, numlower);
@@ -1029,7 +1029,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
break;
}
dput(index);
+ index = NULL;
}
+ dput(index);
inode_unlock(dir);
out:
ovl_cache_free(&list);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fd5ea4facc62..092d150643c1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -211,9 +211,10 @@ static void ovl_put_super(struct super_block *sb)
dput(ufs->indexdir);
dput(ufs->workdir);
- ovl_inuse_unlock(ufs->workbasedir);
+ if (ufs->workdir_locked)
+ ovl_inuse_unlock(ufs->workbasedir);
dput(ufs->workbasedir);
- if (ufs->upper_mnt)
+ if (ufs->upper_mnt && ufs->upperdir_locked)
ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
@@ -881,9 +882,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_upperpath;
err = -EBUSY;
- if (!ovl_inuse_trylock(upperpath.dentry)) {
- pr_err("overlayfs: upperdir is in-use by another mount\n");
+ if (ovl_inuse_trylock(upperpath.dentry)) {
+ ufs->upperdir_locked = true;
+ } else if (ufs->config.index) {
+ pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
goto out_put_upperpath;
+ } else {
+ pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}
err = ovl_mount_dir(ufs->config.workdir, &workpath);
@@ -901,9 +906,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
}
err = -EBUSY;
- if (!ovl_inuse_trylock(workpath.dentry)) {
- pr_err("overlayfs: workdir is in-use by another mount\n");
+ if (ovl_inuse_trylock(workpath.dentry)) {
+ ufs->workdir_locked = true;
+ } else if (ufs->config.index) {
+ pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
goto out_put_workpath;
+ } else {
+ pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
}
ufs->workbasedir = workpath.dentry;
@@ -1156,11 +1165,13 @@ out_put_lowerpath:
out_free_lowertmp:
kfree(lowertmp);
out_unlock_workdentry:
- ovl_inuse_unlock(workpath.dentry);
+ if (ufs->workdir_locked)
+ ovl_inuse_unlock(workpath.dentry);
out_put_workpath:
path_put(&workpath);
out_unlock_upperdentry:
- ovl_inuse_unlock(upperpath.dentry);
+ if (ufs->upperdir_locked)
+ ovl_inuse_unlock(upperpath.dentry);
out_put_upperpath:
path_put(&upperpath);
out_free_config:
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 117794582f9f..b9b239fa5cfd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry)
}
}
-/* Called must hold OVL_I(inode)->oi_lock */
+/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
@@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry)
err = PTR_ERR(index);
if (!IS_ERR(index))
err = ovl_cleanup(dir, index);
+ else
+ index = NULL;
+
inode_unlock(dir);
if (err)
goto fail;
@@ -557,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked)
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
}
+
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+{
+ /* Workdir should not be the same as upperdir */
+ if (workdir == upperdir)
+ goto err;
+
+ /* Workdir should not be subdir of upperdir and vice versa */
+ if (lock_rename(workdir, upperdir) != NULL)
+ goto err_unlock;
+
+ return 0;
+
+err_unlock:
+ unlock_rename(workdir, upperdir);
+err:
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ return -EIO;
+}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c355574aa0..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
#include <linux/mman.h>
#include <linux/sched/mm.h>
#include <linux/sched/numa_balancing.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/task.h>
#include <linux/sched/cputime.h>
#include <linux/proc_fs.h>
@@ -118,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
* simple bit tests.
*/
static const char * const task_state_array[] = {
- "R (running)", /* 0 */
- "S (sleeping)", /* 1 */
- "D (disk sleep)", /* 2 */
- "T (stopped)", /* 4 */
- "t (tracing stop)", /* 8 */
- "X (dead)", /* 16 */
- "Z (zombie)", /* 32 */
+
+ /* states in TASK_REPORT: */
+ "R (running)", /* 0x00 */
+ "S (sleeping)", /* 0x01 */
+ "D (disk sleep)", /* 0x02 */
+ "T (stopped)", /* 0x04 */
+ "t (tracing stop)", /* 0x08 */
+ "X (dead)", /* 0x10 */
+ "Z (zombie)", /* 0x20 */
+ "P (parked)", /* 0x40 */
+
+ /* states beyond TASK_REPORT: */
+ "I (idle)", /* 0x80 */
};
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
-
- /*
- * Parked tasks do not run; they sit in __kthread_parkme().
- * Without this check, we would report them as running, which is
- * clearly wrong, so we report them as sleeping instead.
- */
- if (tsk->state == TASK_PARKED)
- state = TASK_INTERRUPTIBLE;
-
- BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
-
- return task_state_array[fls(state)];
+ BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
+ return task_state_array[__get_task_state(tsk)];
}
static inline int get_task_umask(struct task_struct *tsk)
@@ -421,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* esp and eip are intentionally zeroed out. There is no
* non-racy way to read them without freezing the task.
* Programs that need reliable values can use ptrace(2).
+ *
+ * The only exception is if the task is core dumping because
+ * a program is not able to use ptrace(2) in that case. It is
+ * safe because the task has stopped executing permanently.
*/
+ if (permitted && (task->flags & PF_DUMPCORE)) {
+ eip = KSTK_EIP(task);
+ esp = KSTK_ESP(task);
+ }
}
get_task_comm(tcomm, task);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..52ad15192e72 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1297,21 +1297,18 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
spin_lock(&dquot->dq_dqb_lock);
if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
- goto add;
+ goto finish;
tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
+ space + rsv_space;
- if (flags & DQUOT_SPACE_NOFAIL)
- goto add;
-
if (dquot->dq_dqb.dqb_bhardlimit &&
tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
ret = -EDQUOT;
- goto out;
+ goto finish;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1322,7 +1319,7 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
ret = -EDQUOT;
- goto out;
+ goto finish;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1338,13 +1335,21 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space,
* be always printed
*/
ret = -EDQUOT;
- goto out;
+ goto finish;
}
}
-add:
- dquot->dq_dqb.dqb_rsvspace += rsv_space;
- dquot->dq_dqb.dqb_curspace += space;
-out:
+finish:
+ /*
+ * We have to be careful and go through warning generation & grace time
+ * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it
+ * only here...
+ */
+ if (flags & DQUOT_SPACE_NOFAIL)
+ ret = 0;
+ if (!ret) {
+ dquot->dq_dqb.dqb_rsvspace += rsv_space;
+ dquot->dq_dqb.dqb_curspace += space;
+ }
spin_unlock(&dquot->dq_dqb_lock);
return ret;
}
@@ -1980,7 +1985,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
&warn_to[cnt]);
if (ret) {
+ spin_lock(&transfer_to[cnt]->dq_dqb_lock);
dquot_decr_inodes(transfer_to[cnt], inode_usage);
+ spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
goto over_quota;
}
}
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
if (!dquot->dq_off) {
alloc = true;
down_write(&dqopt->dqio_sem);
+ } else {
+ down_read(&dqopt->dqio_sem);
}
ret = qtree_write_dquot(
sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
dquot);
if (alloc)
up_write(&dqopt->dqio_sem);
+ else
+ up_read(&dqopt->dqio_sem);
return ret;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
* In the generic case the entire file is data, so as long as
* offset isn't at the end of the file then the offset is data.
*/
- if (offset >= eof)
+ if ((unsigned long long)offset >= eof)
return -ENXIO;
break;
case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
* There is a virtual hole at the end of the file, so as long as
* offset isn't i_size or larger, return i_size.
*/
- if (offset >= eof)
+ if ((unsigned long long)offset >= eof)
return -ENXIO;
offset = eof;
break;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ef4b48d1ea42..1c713fd5b3e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
if (ACCESS_ONCE(ctx->released) ||
fatal_signal_pending(current)) {
+ /*
+ * &ewq->wq may be queued in fork_event, but
+ * __remove_wait_queue ignores the head
+ * parameter. It would be a problem if it
+ * didn't.
+ */
__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
if (ewq->msg.event == UFFD_EVENT_FORK) {
struct userfaultfd_ctx *new;
@@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
(unsigned long)
uwq->msg.arg.reserved.reserved1;
list_move(&uwq->wq.entry, &fork_event);
+ /*
+ * fork_nctx can be freed as soon as
+ * we drop the lock, unless we take a
+ * reference on it.
+ */
+ userfaultfd_ctx_get(fork_nctx);
spin_unlock(&ctx->event_wqh.lock);
ret = 0;
break;
@@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+ spin_lock(&ctx->event_wqh.lock);
+ if (!list_empty(&fork_event)) {
+ /*
+ * The fork thread didn't abort, so we can
+ * drop the temporary refcount.
+ */
+ userfaultfd_ctx_put(fork_nctx);
+
+ uwq = list_first_entry(&fork_event,
+ typeof(*uwq),
+ wq.entry);
+ /*
+ * If fork_event list wasn't empty and in turn
+ * the event wasn't already released by fork
+ * (the event is allocated on fork kernel
+ * stack), put the event back to its place in
+ * the event_wq. fork_event head will be freed
+ * as soon as we return so the event cannot
+ * stay queued there no matter the current
+ * "ret" value.
+ */
+ list_del(&uwq->wq.entry);
+ __add_wait_queue(&ctx->event_wqh, &uwq->wq);
- if (!ret) {
- spin_lock(&ctx->event_wqh.lock);
- if (!list_empty(&fork_event)) {
- uwq = list_first_entry(&fork_event,
- typeof(*uwq),
- wq.entry);
- list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->event_wqh, &uwq->wq);
+ /*
+ * Leave the event in the waitqueue and report
+ * error to userland if we failed to resolve
+ * the userfault fork.
+ */
+ if (likely(!ret))
userfaultfd_event_complete(ctx, uwq);
- }
- spin_unlock(&ctx->event_wqh.lock);
+ } else {
+ /*
+ * Here the fork thread aborted and the
+ * refcount from the fork thread on fork_nctx
+ * has already been released. We still hold
+ * the reference we took before releasing the
+ * lock above. If resolve_userfault_fork
+ * failed we've to drop it because the
+ * fork_nctx has to be freed in such case. If
+ * it succeeded we'll hold it because the new
+ * uffd references it.
+ */
+ if (ret)
+ userfaultfd_ctx_put(fork_nctx);
}
+ spin_unlock(&ctx->event_wqh.lock);
}
return ret;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4424f7fecf14..61cd28ba25f3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -250,7 +250,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value,
}
memcpy(value, buffer, len);
out:
- security_release_secctx(buffer, len);
+ kfree(buffer);
out_noalloc:
return len;
}
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index b008ff3250eb..df3e600835e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
trace_xfs_ag_resv_free(pag, type, 0);
resv = xfs_perag_resv(pag, type);
- pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+ if (pag->pag_agno == 0)
+ pag->pag_mount->m_ag_max_usable += resv->ar_asked;
/*
* AGFL blocks are always considered "free", so whatever
* was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
return error;
}
- mp->m_ag_max_usable -= ask;
+ /*
+ * Reduce the maximum per-AG allocation length by however much we're
+ * trying to reserve for an AG. Since this is a filesystem-wide
+ * counter, we only make the adjustment for AG 0. This assumes that
+ * there aren't any AGs hungrier for per-AG reservation than AG 0.
+ */
+ if (pag->pag_agno == 0)
+ mp->m_ag_max_usable -= ask;
resv = xfs_perag_resv(pag, type);
resv->ar_asked = ask;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 744dcaec34cc..f965ce832bc0 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1584,6 +1584,10 @@ xfs_alloc_ag_vextent_small(
bp = xfs_btree_get_bufs(args->mp, args->tp,
args->agno, fbno, 0);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
xfs_trans_binval(args->tp, bp);
}
args->len = 1;
@@ -2141,6 +2145,10 @@ xfs_alloc_fix_freelist(
if (error)
goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto out_agbp_relse;
+ }
xfs_trans_binval(tp, bp);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459f4b4f08fe..def32fa1c225 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
#include "xfs_rmap.h"
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
-#include "xfs_rmap_btree.h"
#include "xfs_icache.h"
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
int maxrecs; /* maximum record count at this level */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t rval; /* return value */
- xfs_filblks_t orig_len;
mp = ip->i_mount;
-
- /* Calculate the worst-case size of the bmbt. */
- orig_len = len;
maxrecs = mp->m_bmap_dmxr[0];
for (level = 0, rval = 0;
level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
len += maxrecs - 1;
do_div(len, maxrecs);
rval += len;
- if (len == 1) {
- rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+ if (len == 1)
+ return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
level - 1;
- break;
- }
if (level == 0)
maxrecs = mp->m_bmap_dmxr[1];
}
-
- /* Calculate the worst-case size of the rmapbt. */
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
- mp->m_rmap_maxlevels;
-
return rval;
}
@@ -1490,14 +1477,14 @@ xfs_bmap_isaeof(
int is_empty;
int error;
- bma->aeof = 0;
+ bma->aeof = false;
error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
&is_empty);
if (error)
return error;
if (is_empty) {
- bma->aeof = 1;
+ bma->aeof = true;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 988bb3f31446..dfd643909f85 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1962,7 +1962,7 @@ xfs_difree_inobt(
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
- xic->deleted = 1;
+ xic->deleted = true;
xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
@@ -1989,7 +1989,7 @@ xfs_difree_inobt(
xfs_difree_inode_chunk(mp, agno, &rec, dfops);
} else {
- xic->deleted = 0;
+ xic->deleted = false;
error = xfs_inobt_update(cur, &rec);
if (error) {
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 8372e9bcd7b6..71de185735e0 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format {
uint32_t ilf_fields; /* flags for fields logged */
uint16_t ilf_asize; /* size of attr d/ext/root */
uint16_t ilf_dsize; /* size of data/ext/root */
+ uint32_t ilf_pad; /* pad for 64 bit boundary */
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
@@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format {
int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
-typedef struct xfs_inode_log_format_32 {
- uint16_t ilf_type; /* inode log item type */
- uint16_t ilf_size; /* size of this item */
- uint32_t ilf_fields; /* flags for fields logged */
- uint16_t ilf_asize; /* size of attr d/ext/root */
- uint16_t ilf_dsize; /* size of data/ext/root */
- uint64_t ilf_ino; /* inode number */
- union {
- uint32_t ilfu_rdev; /* rdev value for dev inode*/
- uuid_t ilfu_uuid; /* mount point value */
- } ilf_u;
- int64_t ilf_blkno; /* blkno of inode buffer */
- int32_t ilf_len; /* len of inode buffer */
- int32_t ilf_boffset; /* off of inode in buffer */
-} __attribute__((packed)) xfs_inode_log_format_32_t;
-
-typedef struct xfs_inode_log_format_64 {
+/*
+ * Old 32 bit systems will log in this format without the 64 bit
+ * alignment padding. Recovery will detect this and convert it to the
+ * correct format.
+ */
+struct xfs_inode_log_format_32 {
uint16_t ilf_type; /* inode log item type */
uint16_t ilf_size; /* size of this item */
uint32_t ilf_fields; /* flags for fields logged */
uint16_t ilf_asize; /* size of attr d/ext/root */
uint16_t ilf_dsize; /* size of data/ext/root */
- uint32_t ilf_pad; /* pad for 64 bit boundary */
uint64_t ilf_ino; /* inode number */
union {
uint32_t ilfu_rdev; /* rdev value for dev inode*/
@@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 {
int64_t ilf_blkno; /* blkno of inode buffer */
int32_t ilf_len; /* len of inode buffer */
int32_t ilf_boffset; /* off of inode in buffer */
-} xfs_inode_log_format_64_t;
+} __attribute__((packed));
/*
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 7034e17535de..3354140de07e 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode)
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ umode_t mode;
+ bool set_mode = false;
int error = 0;
if (!acl)
@@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return error;
if (type == ACL_TYPE_ACCESS) {
- umode_t mode;
-
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
return error;
- error = xfs_set_mode(inode, mode);
- if (error)
- return error;
+ set_mode = true;
}
set_acl:
- return __xfs_set_acl(inode, acl, type);
+ error = __xfs_set_acl(inode, acl, type);
+ if (error)
+ return error;
+
+ /*
+ * We set the mode after successfully updating the ACL xattr because the
+ * xattr update can fail at ENOSPC and we don't want to change the mode
+ * if the ACL update hasn't been applied.
+ */
+ if (set_mode)
+ error = xfs_set_mode(inode, mode);
+
+ return error;
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29172609f2a3..f18e5932aec4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
error = xfs_reflink_end_cow(ip, offset, size);
break;
case XFS_IO_UNWRITTEN:
- error = xfs_iomap_write_unwritten(ip, offset, size);
+ /* writeback should never update isize */
+ error = xfs_iomap_write_unwritten(ip, offset, size, false);
break;
default:
ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index ebd66b19fbfc..e3a950ed35a8 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -302,6 +302,8 @@ xfs_attr3_node_inactive(
&bp, XFS_ATTR_FORK);
if (error)
return error;
+ node = bp->b_addr;
+ btree = dp->d_ops->node_tree_p(node);
child_fsb = be32_to_cpu(btree[i + 1].before);
xfs_trans_brelse(*trans, bp);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..6503cfa44262 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -84,6 +84,7 @@ xfs_zero_extent(
GFP_NOFS, 0);
}
+#ifdef CONFIG_XFS_RT
int
xfs_bmap_rtalloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -190,6 +191,7 @@ xfs_bmap_rtalloc(
}
return 0;
}
+#endif /* CONFIG_XFS_RT */
/*
* Check if the endoff is outside the last extent. If so the caller will grow
@@ -1459,7 +1461,19 @@ xfs_shift_file_space(
return error;
/*
- * The extent shiting code works on extent granularity. So, if
+ * Clean out anything hanging around in the cow fork now that
+ * we've flushed all the dirty data out to disk to avoid having
+ * CoW extents at the wrong offsets.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
+ true);
+ if (error)
+ return error;
+ }
+
+ /*
+ * The extent shifting code works on extent granularity. So, if
* stop_fsb is not the starting block of extent, we need to split
* the extent at stop_fsb.
*/
@@ -2110,11 +2124,31 @@ xfs_swap_extents(
ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
+ }
+
+ /* Swap the cow forks. */
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ xfs_extnum_t extnum;
+
+ ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+ ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+
+ extnum = ip->i_cnextents;
+ ip->i_cnextents = tip->i_cnextents;
+ tip->i_cnextents = extnum;
+
cowfp = ip->i_cowfp;
ip->i_cowfp = tip->i_cowfp;
tip->i_cowfp = cowfp;
- xfs_inode_set_cowblocks_tag(ip);
- xfs_inode_set_cowblocks_tag(tip);
+
+ if (ip->i_cowfp && ip->i_cnextents)
+ xfs_inode_set_cowblocks_tag(ip);
+ else
+ xfs_inode_clear_cowblocks_tag(ip);
+ if (tip->i_cowfp && tip->i_cnextents)
+ xfs_inode_set_cowblocks_tag(tip);
+ else
+ xfs_inode_clear_cowblocks_tag(tip);
}
xfs_trans_log_inode(tp, ip, src_log_flags);
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 0eaa81dc49be..7d330b3c77c3 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -28,7 +28,20 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_bmalloca;
+#ifdef CONFIG_XFS_RT
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
+#else /* !CONFIG_XFS_RT */
+/*
+ * Attempts to allocate RT extents when RT is disable indicates corruption and
+ * should trigger a shutdown.
+ */
+static inline int
+xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
+{
+ return -EFSCORRUPTED;
+}
+#endif /* CONFIG_XFS_RT */
+
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index da14658da310..2f97c12ca75e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
int size;
int offset;
- total_nr_pages = bp->b_page_count;
-
/* skip the pages in the buffer before the start offset */
page_index = 0;
offset = *buf_offset;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bd786a9ac2c3..eaf86f55b7f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
__return_address, bp->b_ops->name, bp->b_bn);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebdd0bd2b261..56d0e526870c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
xfs_off_t count,
bool *did_zero)
{
- return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
+ return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
}
int
@@ -377,8 +377,6 @@ restart:
*/
spin_lock(&ip->i_flags_lock);
if (iocb->ki_pos > i_size_read(inode)) {
- bool zero = false;
-
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
drained_dio = true;
goto restart;
}
- error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
+ error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
if (error)
return error;
} else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
- bool update_size = false;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
if (size <= 0)
return size;
+ if (flags & IOMAP_DIO_COW) {
+ error = xfs_reflink_end_cow(ip, offset, size);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Unwritten conversion updates the in-core isize after extent
+ * conversion but before updating the on-disk size. Updating isize any
+ * earlier allows a racing dio read to find unwritten extents before
+ * they are converted.
+ */
+ if (flags & IOMAP_DIO_UNWRITTEN)
+ return xfs_iomap_write_unwritten(ip, offset, size, true);
+
/*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode)) {
i_size_write(inode, offset + size);
- update_size = true;
- }
- spin_unlock(&ip->i_flags_lock);
-
- if (flags & IOMAP_DIO_COW) {
- error = xfs_reflink_end_cow(ip, offset, size);
- if (error)
- return error;
- }
-
- if (flags & IOMAP_DIO_UNWRITTEN)
- error = xfs_iomap_write_unwritten(ip, offset, size);
- else if (update_size)
+ spin_unlock(&ip->i_flags_lock);
error = xfs_setfilesize(ip, offset, size);
+ } else {
+ spin_unlock(&ip->i_flags_lock);
+ }
return error;
}
@@ -761,7 +764,7 @@ xfs_file_fallocate(
enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL;
loff_t new_size = 0;
- bool do_file_insert = 0;
+ bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -822,7 +825,7 @@ xfs_file_fallocate(
error = -EINVAL;
goto out_unlock;
}
- do_file_insert = 1;
+ do_file_insert = true;
} else {
flags |= XFS_PREALLOC_SET;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 814ed729881d..560e0b40ac1b 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -521,6 +521,7 @@ __xfs_getfsmap_rtdev(
return query_fn(tp, info);
}
+#ifdef CONFIG_XFS_RT
/* Actually query the realtime bitmap. */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_query(
@@ -561,6 +562,7 @@ xfs_getfsmap_rtdev_rtbitmap(
return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
info);
}
+#endif /* CONFIG_XFS_RT */
/* Execute a getfsmap query against the regular data device. */
STATIC int
@@ -795,7 +797,15 @@ xfs_getfsmap_check_keys(
return false;
}
+/*
+ * There are only two devices if we didn't configure RT devices at build time.
+ */
+#ifdef CONFIG_XFS_RT
#define XFS_GETFSMAP_DEVS 3
+#else
+#define XFS_GETFSMAP_DEVS 2
+#endif /* CONFIG_XFS_RT */
+
/*
* Get filesystem's extents as described in head, and format for
* output. Calls formatter to fill the user's buffer until all
@@ -853,10 +863,12 @@ xfs_getfsmap(
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
}
+#ifdef CONFIG_XFS_RT
if (mp->m_rtdev_targp) {
handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
}
+#endif /* CONFIG_XFS_RT */
xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
xfs_getfsmap_dev_compare);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5599dda4727a..4ec5b7f45401 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
goto out;
/*
- * Clear the reflink flag if we truncated everything.
+ * Clear the reflink flag if there are no data fork blocks and
+ * there are no extents staged in the cow fork.
*/
- if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
+ if (ip->i_d.di_nblocks == 0)
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
xfs_inode_clear_cowblocks_tag(ip);
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6d0f74ec31e8..9bbc2d7cc8cb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -364,6 +364,9 @@ xfs_inode_to_log_dinode(
to->di_dmstate = from->di_dmstate;
to->di_flags = from->di_flags;
+ /* log a dummy value to ensure log structure is fully initialised */
+ to->di_next_unlinked = NULLAGINO;
+
if (from->di_version == 3) {
to->di_changecount = inode->i_version;
to->di_crtime.t_sec = from->di_crtime.t_sec;
@@ -404,6 +407,11 @@ xfs_inode_item_format_core(
* the second with the on-disk inode structure, and a possible third and/or
* fourth with the inode data/extents/b-tree root and inode attributes
* data/extents/b-tree root.
+ *
+ * Note: Always use the 64 bit inode log format structure so we don't
+ * leave an uninitialised hole in the format item on 64 bit systems. Log
+ * recovery on 32 bit systems handles this just fine, so there's no reason
+ * for not using an initialising the properly padded structure all the time.
*/
STATIC void
xfs_inode_item_format(
@@ -412,8 +420,8 @@ xfs_inode_item_format(
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- struct xfs_inode_log_format *ilf;
struct xfs_log_iovec *vecp = NULL;
+ struct xfs_inode_log_format *ilf;
ASSERT(ip->i_d.di_version > 1);
@@ -425,7 +433,17 @@ xfs_inode_item_format(
ilf->ilf_boffset = ip->i_imap.im_boffset;
ilf->ilf_fields = XFS_ILOG_CORE;
ilf->ilf_size = 2; /* format + core */
- xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
+
+ /*
+ * make sure we don't leak uninitialised data into the log in the case
+ * when we don't log every field in the inode.
+ */
+ ilf->ilf_dsize = 0;
+ ilf->ilf_asize = 0;
+ ilf->ilf_pad = 0;
+ uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null);
+
+ xlog_finish_iovec(lv, vecp, sizeof(*ilf));
xfs_inode_item_format_core(ip, lv, &vecp);
xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
@@ -745,7 +763,7 @@ xfs_iflush_done(
*/
iip = INODE_ITEM(blip);
if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
- lip->li_flags & XFS_LI_FAILED)
+ (blip->li_flags & XFS_LI_FAILED))
need_ail++;
blip = next;
@@ -855,44 +873,29 @@ xfs_istale_done(
}
/*
- * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
- * (which can have different field alignments) to the native version
+ * convert an xfs_inode_log_format struct from the old 32 bit version
+ * (which can have different field alignments) to the native 64 bit version
*/
int
xfs_inode_item_format_convert(
- xfs_log_iovec_t *buf,
- xfs_inode_log_format_t *in_f)
+ struct xfs_log_iovec *buf,
+ struct xfs_inode_log_format *in_f)
{
- if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
- xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
-
- in_f->ilf_type = in_f32->ilf_type;
- in_f->ilf_size = in_f32->ilf_size;
- in_f->ilf_fields = in_f32->ilf_fields;
- in_f->ilf_asize = in_f32->ilf_asize;
- in_f->ilf_dsize = in_f32->ilf_dsize;
- in_f->ilf_ino = in_f32->ilf_ino;
- /* copy biggest field of ilf_u */
- uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
- in_f->ilf_blkno = in_f32->ilf_blkno;
- in_f->ilf_len = in_f32->ilf_len;
- in_f->ilf_boffset = in_f32->ilf_boffset;
- return 0;
- } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
- xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
-
- in_f->ilf_type = in_f64->ilf_type;
- in_f->ilf_size = in_f64->ilf_size;
- in_f->ilf_fields = in_f64->ilf_fields;
- in_f->ilf_asize = in_f64->ilf_asize;
- in_f->ilf_dsize = in_f64->ilf_dsize;
- in_f->ilf_ino = in_f64->ilf_ino;
- /* copy biggest field of ilf_u */
- uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
- in_f->ilf_blkno = in_f64->ilf_blkno;
- in_f->ilf_len = in_f64->ilf_len;
- in_f->ilf_boffset = in_f64->ilf_boffset;
- return 0;
- }
- return -EFSCORRUPTED;
+ struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
+
+ if (buf->i_len != sizeof(*in_f32))
+ return -EFSCORRUPTED;
+
+ in_f->ilf_type = in_f32->ilf_type;
+ in_f->ilf_size = in_f32->ilf_size;
+ in_f->ilf_fields = in_f32->ilf_fields;
+ in_f->ilf_asize = in_f32->ilf_asize;
+ in_f->ilf_dsize = in_f32->ilf_dsize;
+ in_f->ilf_ino = in_f32->ilf_ino;
+ /* copy biggest field of ilf_u */
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
+ in_f->ilf_blkno = in_f32->ilf_blkno;
+ in_f->ilf_len = in_f32->ilf_len;
+ in_f->ilf_boffset = in_f32->ilf_boffset;
+ return 0;
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..aa75389be8cf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
int *join_flags)
{
struct inode *inode = VFS_I(ip);
+ struct super_block *sb = inode->i_sb;
int error;
*join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
if (fa->fsx_xflags & FS_XFLAG_DAX) {
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
return -EINVAL;
- if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+ if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
return -EINVAL;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a1909bc064e9..f179bdf1644d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
xfs_iomap_write_unwritten(
xfs_inode_t *ip,
xfs_off_t offset,
- xfs_off_t count)
+ xfs_off_t count,
+ bool update_isize)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
xfs_trans_t *tp;
xfs_bmbt_irec_t imap;
struct xfs_defer_ops dfops;
+ struct inode *inode = VFS_I(ip);
xfs_fsize_t i_size;
uint resblks;
int error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
if (i_size > offset + count)
i_size = offset + count;
-
+ if (update_isize && i_size > i_size_read(inode))
+ i_size_write(inode, i_size);
i_size = xfs_new_eof(ip, i_size);
if (i_size) {
ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 00db3ecea084..ee535065c5d0 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *, int);
int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c5107c7bc4bf..dc95a49d62e7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2515,7 +2515,7 @@ next_lv:
if (lv)
vecp = lv->lv_iovecp;
}
- if (record_cnt == 0 && ordered == false) {
+ if (record_cnt == 0 && !ordered) {
if (!lv)
return 0;
break;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index ea7d4b4e50d0..e9727d0a541a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -704,7 +704,7 @@ xfs_mountfs(
xfs_set_maxicount(mp);
/* enable fail_at_unmount as default */
- mp->m_fail_unmount = 1;
+ mp->m_fail_unmount = true;
error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 0c381d71b242..0492436a053f 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
- XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
}
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 2f2dc3c09ad0..4246876df7b7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
(end - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(error);
- error = xfs_iomap_write_unwritten(ip, start, length);
+ error = xfs_iomap_write_unwritten(ip, start, length, false);
if (error)
goto out_drop_iolock;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3246815c24d6..37e603bf1591 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -736,7 +736,13 @@ xfs_reflink_end_cow(
/* If there is a hole at end_fsb - 1 go to the previous extent */
if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
got.br_startoff > end_fsb) {
- ASSERT(idx > 0);
+ /*
+ * In case of racing, overlapping AIO writes no COW extents
+ * might be left by the time I/O completes for the loser of
+ * the race. In that case we are done.
+ */
+ if (idx <= 0)
+ goto out_cancel;
xfs_iext_get_extent(ifp, --idx, &got);
}
@@ -809,6 +815,7 @@ next_extent:
out_defer:
xfs_defer_cancel(&dfops);
+out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c996f4ae4a5f..584cf2d573ba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
"DAX and reflink have not been tested together!");
}
+ if (mp->m_flags & XFS_MOUNT_DISCARD) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+ if (!blk_queue_discard(q)) {
+ xfs_warn(mp, "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ mp->m_flags &= ~XFS_MOUNT_DISCARD;
+ }
+ }
+
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
if (mp->m_sb.sb_rblocks) {
xfs_alert(mp,