summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJoel Stanley <joel@jms.id.au>2020-07-22 12:42:41 +0300
committerJoel Stanley <joel@jms.id.au>2020-07-22 12:42:46 +0300
commit8a9b346382056b52cd7ff141ae9f15a0fcfeb13d (patch)
tree7b855ed138c412bc27713ea8d3feef8939c954a0 /fs
parent2b4829edfc1c225c717652153097470529d171db (diff)
parentd811d29517d1ea05bc159579231652d3ca1c2a01 (diff)
downloadlinux-8a9b346382056b52cd7ff141ae9f15a0fcfeb13d.tar.xz
Merge tag 'v5.4.53' into dev-5.4
This is the 5.4.53 stable release Signed-off-by: Joel Stanley <joel@jms.id.au>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cell.c9
-rw-r--r--fs/afs/dir.c9
-rw-r--r--fs/afs/fsclient.c88
-rw-r--r--fs/afs/inode.c12
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/proc.c1
-rw-r--r--fs/afs/rxrpc.c4
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/afs/yfsclient.c85
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/btrfs/block-group.c19
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent_io.c40
-rw-r--r--fs/btrfs/inode.c48
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/ceph/export.c9
-rw-r--r--fs/cifs/connect.c28
-rw-r--r--fs/cifs/inode.c19
-rw-r--r--fs/cifs/smb2ops.c12
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/erofs/zdata.h20
-rw-r--r--fs/ext4/dir.c16
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/super.c23
-rw-r--r--fs/f2fs/checkpoint.c4
-rw-r--r--fs/f2fs/dir.c80
-rw-r--r--fs/f2fs/f2fs.h13
-rw-r--r--fs/f2fs/file.c9
-rw-r--r--fs/f2fs/node.c8
-rw-r--r--fs/f2fs/super.c5
-rw-r--r--fs/fuse/file.c57
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c15
-rw-r--r--fs/fuse/virtio_fs.c106
-rw-r--r--fs/gfs2/log.c11
-rw-r--r--fs/gfs2/ops_fstype.c14
-rw-r--r--fs/io_uring.c63
-rw-r--r--fs/jbd2/journal.c103
-rw-r--r--fs/nfs/direct.c13
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c11
-rw-r--r--fs/nfs/inode.c14
-rw-r--r--fs/nfs/nfs4proc.c22
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/nfscache.c32
-rw-r--r--fs/nfsd/nfsctl.c29
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/dlmglue.c17
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/suballoc.c9
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/file.c10
-rw-r--r--fs/overlayfs/super.c23
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c16
61 files changed, 749 insertions, 441 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 78ba5f932287..296b489861a9 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -154,10 +154,17 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
return ERR_PTR(-ENOMEM);
}
+ cell->name = kmalloc(namelen + 1, GFP_KERNEL);
+ if (!cell->name) {
+ kfree(cell);
+ return ERR_PTR(-ENOMEM);
+ }
+
cell->net = net;
cell->name_len = namelen;
for (i = 0; i < namelen; i++)
cell->name[i] = tolower(name[i]);
+ cell->name[i] = 0;
atomic_set(&cell->usage, 2);
INIT_WORK(&cell->manager, afs_manage_cell);
@@ -203,6 +210,7 @@ parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
error:
+ kfree(cell->name);
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -483,6 +491,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
key_put(cell->anonymous_key);
+ kfree(cell->name);
kfree(cell);
_leave(" [destroyed]");
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index d1e1caa23c8b..3c486340b220 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -658,7 +658,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->ctx.actor = afs_lookup_filldir;
cookie->name = dentry->d_name;
- cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
+ cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
+ * and slot 1 for the directory */
read_seqlock_excl(&dvnode->cb_lock);
dcbi = rcu_dereference_protected(dvnode->cb_interest,
@@ -709,7 +710,11 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
if (!cookie->inodes)
goto out_s;
- for (i = 1; i < cookie->nr_fids; i++) {
+ cookie->fids[1] = dvnode->fid;
+ cookie->statuses[1].cb_break = afs_calc_vnode_cb_break(dvnode);
+ cookie->inodes[1] = igrab(&dvnode->vfs_inode);
+
+ for (i = 2; i < cookie->nr_fids; i++) {
scb = &cookie->statuses[i];
/* Find any inodes that already exist and get their
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 0a4fed9e706b..5c2729fc07e5 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -56,16 +56,15 @@ static void xdr_dump_bad(const __be32 *bp)
/*
* decode an AFSFetchStatus block
*/
-static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
- struct afs_call *call,
- struct afs_status_cb *scb)
+static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
struct afs_file_status *status = &scb->status;
bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
- int ret;
abort_code = ntohl(xdr->abort_code);
@@ -79,7 +78,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
*/
status->abort_code = abort_code;
scb->have_error = true;
- goto good;
+ goto advance;
}
pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
@@ -89,7 +88,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
if (abort_code != 0 && inline_error) {
status->abort_code = abort_code;
scb->have_error = true;
- goto good;
+ goto advance;
}
type = ntohl(xdr->type);
@@ -125,15 +124,13 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
status->data_version = data_version;
scb->have_status = true;
-good:
- ret = 0;
advance:
*_bp = (const void *)*_bp + sizeof(*xdr);
- return ret;
+ return;
bad:
xdr_dump_bad(*_bp);
- ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
goto advance;
}
@@ -254,9 +251,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
@@ -419,9 +414,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
@@ -579,12 +572,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->out_fid);
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
@@ -693,9 +682,7 @@ static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -786,12 +773,8 @@ static int afs_deliver_fs_link(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -880,12 +863,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->out_fid);
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -988,16 +967,12 @@ static int afs_deliver_fs_rename(struct afs_call *call)
if (ret < 0)
return ret;
+ bp = call->buffer;
/* If the two dirs are the same, we have two copies of the same status
* report, so we just decode it twice.
*/
- bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -1105,9 +1080,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -1285,9 +1258,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -1956,9 +1927,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSCallBack(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
@@ -2064,10 +2033,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
bp = call->buffer;
scb = &call->out_scb[call->count];
- ret = xdr_decode_AFSFetchStatus(&bp, call, scb);
- if (ret < 0)
- return ret;
-
+ xdr_decode_AFSFetchStatus(&bp, call, scb);
call->count++;
if (call->count < call->count2)
goto more_counts;
@@ -2245,9 +2211,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
@@ -2328,9 +2292,7 @@ static int afs_deliver_fs_file_status_and_vol(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 46d2d7cb461d..a74e8e209454 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -171,6 +171,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
struct timespec64 t;
umode_t mode;
bool data_changed = false;
+ bool change_size = false;
BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
@@ -226,6 +227,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
} else {
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
}
+ change_size = true;
} else if (vnode->status.type == AFS_FTYPE_DIR) {
/* Expected directory change is handled elsewhere so
* that we can locally edit the directory and save on a
@@ -233,11 +235,19 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
*/
if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
data_changed = false;
+ change_size = true;
}
if (data_changed) {
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- afs_set_i_size(vnode, status->size);
+
+ /* Only update the size if the data version jumped. If the
+ * file is being modified locally, then we might have our own
+ * idea of what the size should be that's not the same as
+ * what's on the server.
+ */
+ if (change_size)
+ afs_set_i_size(vnode, status->size);
}
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 485cc3b2aaa8..7fe88d918b23 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -161,6 +161,7 @@ struct afs_call {
bool upgrade; /* T to request service upgrade */
bool have_reply_time; /* T if have got reply_time */
bool intr; /* T if interruptible */
+ bool unmarshalling_error; /* T if an unmarshalling error occurred */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
@@ -396,7 +397,7 @@ struct afs_cell {
struct afs_vlserver_list __rcu *vl_servers;
u8 name_len; /* Length of name */
- char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
+ char *name; /* Cell name, case-flattened and NUL-padded */
};
/*
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 52b19e9c1535..5334f1bd2bca 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -83,6 +83,7 @@ int afs_abort_to_error(u32 abort_code)
case UAENOLCK: return -ENOLCK;
case UAENOTEMPTY: return -ENOTEMPTY;
case UAELOOP: return -ELOOP;
+ case UAEOVERFLOW: return -EOVERFLOW;
case UAENOMEDIUM: return -ENOMEDIUM;
case UAEDQUOT: return -EDQUOT;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index fba2ec3a3a9c..106b27011f6d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -562,6 +562,7 @@ void afs_put_sysnames(struct afs_sysnames *sysnames)
if (sysnames->subs[i] != afs_init_sysname &&
sysnames->subs[i] != sysnames->blank)
kfree(sysnames->subs[i]);
+ kfree(sysnames);
}
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 52aa90fb4fbd..6adab30a8399 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -540,6 +540,8 @@ static void afs_deliver_to_call(struct afs_call *call)
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
+ if (ret == 0 && call->unmarshalling_error)
+ ret = -EBADMSG;
switch (ret) {
case 0:
afs_queue_call_work(call);
@@ -963,5 +965,7 @@ noinline int afs_protocol_error(struct afs_call *call, int error,
enum afs_eproto_cause cause)
{
trace_afs_protocol_error(call, error, cause);
+ if (call)
+ call->unmarshalling_error = true;
return error;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index cb76566763db..96b042af6248 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -194,11 +194,11 @@ int afs_write_end(struct file *file, struct address_space *mapping,
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size) {
- spin_lock(&vnode->wb_lock);
+ write_seqlock(&vnode->cb_lock);
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size)
i_size_write(&vnode->vfs_inode, maybe_i_size);
- spin_unlock(&vnode->wb_lock);
+ write_sequnlock(&vnode->cb_lock);
}
if (!PageUptodate(page)) {
@@ -811,6 +811,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
vmf->page->index, priv);
SetPagePrivate(vmf->page);
set_page_private(vmf->page, priv);
+ file_update_time(file);
sb_end_pagefault(inode->i_sb);
return VM_FAULT_LOCKED;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 8af7f093305d..d21cf61d86b9 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -179,21 +179,20 @@ static void xdr_dump_bad(const __be32 *bp)
/*
* Decode a YFSFetchStatus block
*/
-static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
- struct afs_call *call,
- struct afs_status_cb *scb)
+static void xdr_decode_YFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
struct afs_file_status *status = &scb->status;
u32 type;
- int ret;
status->abort_code = ntohl(xdr->abort_code);
if (status->abort_code != 0) {
if (status->abort_code == VNOVNODE)
status->nlink = 0;
scb->have_error = true;
- goto good;
+ goto advance;
}
type = ntohl(xdr->type);
@@ -221,15 +220,13 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
status->size = xdr_to_u64(xdr->size);
status->data_version = xdr_to_u64(xdr->data_version);
scb->have_status = true;
-good:
- ret = 0;
advance:
*_bp += xdr_size(xdr);
- return ret;
+ return;
bad:
xdr_dump_bad(*_bp);
- ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
goto advance;
}
@@ -348,9 +345,7 @@ static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_YFSCallBack(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
@@ -372,9 +367,7 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -534,9 +527,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_YFSCallBack(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
@@ -645,12 +636,8 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_YFSFid(&bp, call->out_fid);
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_YFSCallBack(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
@@ -803,14 +790,9 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
-
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_YFSFid(&bp, &fid);
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
/* Was deleted if vnode->status.abort_code == VNOVNODE. */
xdr_decode_YFSVolSync(&bp, call->out_volsync);
@@ -890,10 +872,7 @@ static int yfs_deliver_fs_remove(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
-
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
return 0;
}
@@ -975,12 +954,8 @@ static int yfs_deliver_fs_link(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1062,12 +1037,8 @@ static int yfs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_YFSFid(&bp, call->out_fid);
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
@@ -1155,13 +1126,11 @@ static int yfs_deliver_fs_rename(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
- if (ret < 0)
- return ret;
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
-
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1846,9 +1815,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
bp = call->buffer;
scb = &call->out_scb[call->count];
- ret = xdr_decode_YFSFetchStatus(&bp, call, scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, scb);
call->count++;
if (call->count < call->count2)
@@ -2068,9 +2035,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
bp = call->buffer;
yacl->inherit_flag = ntohl(*bp++);
yacl->num_cleaned = ntohl(*bp++);
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
+ xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 34644ce4b502..2dc9c73a4cb2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1546,10 +1546,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
*/
if (!for_part) {
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
+ if (ret != 0)
return ret;
- }
}
restart:
@@ -1618,8 +1616,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, mode, 1);
- if (ret)
+ if (ret) {
+ bdput(whole);
goto out_clear;
+ }
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
@@ -1669,7 +1669,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_unblock_events(disk);
put_disk_and_module(disk);
out:
- bdput(bdev);
return ret;
}
@@ -1736,6 +1735,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
bdput(whole);
}
+ if (res)
+ bdput(bdev);
+
return res;
}
EXPORT_SYMBOL(blkdev_get);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c2dd94e1b274..42d69e77f89d 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -910,7 +910,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -948,7 +948,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -971,13 +971,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -986,6 +986,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->key.objectid)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1094,10 +1097,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
@@ -1120,10 +1120,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_extent_map(em);
}
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6d2c277c6e0a..36cd210ee2ef 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -940,6 +940,8 @@ enum {
BTRFS_ROOT_DEAD_RELOC_TREE,
/* Mark dead root stored on device whose cleanup needs to be resumed */
BTRFS_ROOT_DEAD_TREE,
+ /* The root has a log tree. Used only for subvolume roots. */
+ BTRFS_ROOT_HAS_LOG_TREE,
};
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8aab286f2028..9b214b14a3aa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5025,25 +5025,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5493,6 +5496,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 127cdecbe872..e408181a5eba 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -975,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1025,10 +1026,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1328,6 +1345,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1358,18 +1377,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
@@ -1633,12 +1657,8 @@ out_check:
ret = fallback_to_cow(inode, locked_page, cow_start,
found_key.offset - 1,
page_started, nr_written);
- if (ret) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1654,9 +1674,6 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
ret = PTR_ERR(em);
goto error;
}
@@ -8833,9 +8850,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7d464b049507..f46afbff668e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -167,6 +167,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
if (ret)
goto out;
+ set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
}
@@ -193,6 +194,9 @@ static int join_running_log_trans(struct btrfs_root *root)
{
int ret = -ENOENT;
+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
+ return ret;
+
mutex_lock(&root->log_mutex);
if (root->log_root) {
ret = 0;
@@ -3327,6 +3331,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
if (root->log_root) {
free_log_tree(trans, root->log_root);
root->log_root = NULL;
+ clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
}
return 0;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 79dc06881e78..e088843a7734 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -172,9 +172,16 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
{
struct inode *inode = __lookup_inode(sb, ino);
+ int err;
+
if (IS_ERR(inode))
return ERR_CAST(inode);
- if (inode->i_nlink == 0) {
+ /* We need LINK caps to reliably check i_nlink */
+ err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
+ if (err)
+ return ERR_PTR(err);
+ /* -ESTALE if inode as been unlinked and no file is open */
+ if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
iput(inode);
return ERR_PTR(-ESTALE);
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 721b2560caa7..f5df2a4195c2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -614,26 +614,26 @@ cifs_reconnect(struct TCP_Server_Info *server)
try_to_freeze();
mutex_lock(&server->srv_mutex);
+#ifdef CONFIG_CIFS_DFS_UPCALL
/*
* Set up next DFS target server (if any) for reconnect. If DFS
* feature is disabled, then we will retry last server we
* connected to before.
*/
+ reconn_inval_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+#endif
+ rc = reconn_set_ipaddr(server);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+ __func__, rc);
+ }
+
if (cifs_rdma_enabled(server))
rc = smbd_reconnect(server);
else
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
-#ifdef CONFIG_CIFS_DFS_UPCALL
- reconn_inval_dfs_target(server, cifs_sb, &tgt_list,
- &tgt_it);
-#endif
- rc = reconn_set_ipaddr(server);
- if (rc) {
- cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
- __func__, rc);
- }
mutex_unlock(&server->srv_mutex);
msleep(3000);
} else {
@@ -5281,9 +5281,15 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->nocase = master_tcon->nocase;
vol_info->nohandlecache = master_tcon->nohandlecache;
vol_info->local_lease = master_tcon->local_lease;
+ vol_info->no_lease = master_tcon->no_lease;
+ vol_info->resilient = master_tcon->use_resilient;
+ vol_info->persistent = master_tcon->use_persistent;
+ vol_info->handle_timeout = master_tcon->handle_timeout;
vol_info->no_linux_ext = !master_tcon->unix_ext;
+ vol_info->linux_ext = master_tcon->posix_extensions;
vol_info->sectype = master_tcon->ses->sectype;
vol_info->sign = master_tcon->ses->sign;
+ vol_info->seal = master_tcon->seal;
rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
if (rc) {
@@ -5309,10 +5315,6 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
- /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
- if (tcon->posix_extensions)
- cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
-
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 5e6bc8fa4e46..5ae458505f63 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1791,6 +1791,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
FILE_UNIX_BASIC_INFO *info_buf_target;
unsigned int xid;
int rc, tmprc;
+ bool new_target = d_really_is_negative(target_dentry);
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -1867,8 +1868,13 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
*/
unlink_target:
- /* Try unlinking the target dentry if it's not negative */
- if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
+ /*
+ * If the target dentry was created during the rename, try
+ * unlinking it if it's not negative
+ */
+ if (new_target &&
+ d_really_is_positive(target_dentry) &&
+ (rc == -EACCES || rc == -EEXIST)) {
if (d_is_dir(target_dentry))
tmprc = cifs_rmdir(target_dir, target_dentry);
else
@@ -2264,6 +2270,15 @@ set_size_out:
if (rc == 0) {
cifsInode->server_eof = attrs->ia_size;
cifs_setsize(inode, attrs->ia_size);
+
+ /*
+ * The man page of truncate says if the size changed,
+ * then the st_ctime and st_mtime fields for the file
+ * are updated.
+ */
+ attrs->ia_ctime = attrs->ia_mtime = current_time(inode);
+ attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+
cifs_truncate_page(inode->i_mapping, inode->i_size);
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 58915d882285..7ccbfc656478 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -736,6 +736,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
/* close extra handle outside of crit sec */
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
}
+ rc = 0;
goto oshr_free;
}
@@ -2969,6 +2970,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
@@ -3035,6 +3041,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index fe1552cc8a0a..eafc49de4d7f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -528,7 +528,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
const int timeout, const int flags,
unsigned int *instance)
{
- int rc;
+ long rc;
int *credits;
int optype;
long int t;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 416d9de35679..4311d01b02a8 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -97,7 +97,6 @@ do { \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
- BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index faf950189bd7..568d5a493876 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -148,22 +148,22 @@ static inline void z_erofs_onlinepage_init(struct page *page)
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
{
- unsigned long *p, o, v, id;
-repeat:
- p = &page_private(page);
- o = READ_ONCE(*p);
+ union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
+ int orig, orig_index, val;
- id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
- if (id) {
+repeat:
+ orig = atomic_read(u.o);
+ orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+ if (orig_index) {
if (!index)
return;
- DBG_BUGON(id != index);
+ DBG_BUGON(orig_index != index);
}
- v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
- ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
- if (cmpxchg(p, o, v) != o)
+ val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+ ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
+ if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2743c6f8a457..0589e914663f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -677,6 +677,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
+ char strbuf[DNAME_INLINE_LEN];
if (!inode || !IS_CASEFOLDED(inode) ||
!EXT4_SB(inode->i_sb)->s_encoding) {
@@ -685,6 +686,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
+ /*
+ * If the dentry name is stored in-line, then it may be concurrently
+ * modified by a rename. If this happens, the VFS will eventually retry
+ * the lookup, so it doesn't matter what ->d_compare() returns.
+ * However, it's unsafe to call utf8_strncasecmp() with an unstable
+ * string. Therefore, we have to copy the name into a temporary buffer.
+ */
+ if (len <= DNAME_INLINE_LEN - 1) {
+ memcpy(strbuf, str, len);
+ strbuf[len] = 0;
+ qstr.name = strbuf;
+ /* prevent compiler from optimizing out the temporary buffer */
+ barrier();
+ }
+
return ext4_ci_compare(inode, name, &qstr, false);
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9bd44588eb77..3193f0b4a02d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3010,7 +3010,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial.pclu = EXT4_B2C(sbi, pblk);
partial.state = nofree;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d3500eaf900e..f7c20bb20da3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2034,6 +2034,16 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
#endif
} else if (token == Opt_dax) {
#ifdef CONFIG_FS_DAX
+ if (is_remount && test_opt(sb, DAX)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -1;
+ }
+ if (is_remount && !(sbi->s_mount_opt & EXT4_MOUNT_DAX)) {
+ ext4_msg(sb, KERN_ERR, "can't change "
+ "dax mount option while remounting");
+ return -1;
+ }
ext4_msg(sb, KERN_WARNING,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
sbi->s_mount_opt |= m->mount_opt;
@@ -2294,6 +2304,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
err = -EROFS;
+ goto done;
}
if (read_only)
goto done;
@@ -5366,12 +5377,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- err = -EINVAL;
- goto restore_opts;
- }
} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -5387,12 +5392,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
- ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
- }
-
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, "Abort forced by user");
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a28ffecc0f95..bbd07fe8a492 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -892,8 +892,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
int i;
int err;
- sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
- GFP_KERNEL);
+ sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks),
+ GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 84280ad3786c..e9af46dc06f7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -107,36 +107,28 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
- *
- * Returns: 0 if the directory entry matches, more than 0 if it
- * doesn't match or less than zero on error.
*/
-int f2fs_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry, bool quick)
+static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
+ const struct qstr *entry, bool quick)
{
- const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb);
+ const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
const struct unicode_map *um = sbi->s_encoding;
- int ret;
+ int res;
if (quick)
- ret = utf8_strncasecmp_folded(um, name, entry);
+ res = utf8_strncasecmp_folded(um, name, entry);
else
- ret = utf8_strncasecmp(um, name, entry);
-
- if (ret < 0) {
- /* Handle invalid character sequence as either an error
- * or as an opaque byte sequence.
+ res = utf8_strncasecmp(um, name, entry);
+ if (res < 0) {
+ /*
+ * In strict mode, ignore invalid names. In non-strict mode,
+ * fall back to treating them as opaque byte sequences.
*/
- if (f2fs_has_strict_mode(sbi))
- return -EINVAL;
-
- if (name->len != entry->len)
- return 1;
-
- return !!memcmp(name->name, entry->name, name->len);
+ if (f2fs_has_strict_mode(sbi) || name->len != entry->len)
+ return false;
+ return !memcmp(name->name, entry->name, name->len);
}
-
- return ret;
+ return res == 0;
}
static void f2fs_fname_setup_ci_filename(struct inode *dir,
@@ -188,10 +180,10 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d,
if (cf_str->name) {
struct qstr cf = {.name = cf_str->name,
.len = cf_str->len};
- return !f2fs_ci_compare(parent, &cf, &entry, true);
+ return f2fs_match_ci_name(parent, &cf, &entry, true);
}
- return !f2fs_ci_compare(parent, fname->usr_fname, &entry,
- false);
+ return f2fs_match_ci_name(parent, fname->usr_fname, &entry,
+ false);
}
#endif
if (fscrypt_match_name(fname, d->filename[bit_pos],
@@ -1067,17 +1059,41 @@ const struct file_operations f2fs_dir_operations = {
static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
- struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *inode = READ_ONCE(parent->d_inode);
+ const struct inode *dir = READ_ONCE(parent->d_inode);
+ const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ struct qstr entry = QSTR_INIT(str, len);
+ char strbuf[DNAME_INLINE_LEN];
+ int res;
+
+ if (!dir || !IS_CASEFOLDED(dir))
+ goto fallback;
- if (!inode || !IS_CASEFOLDED(inode)) {
- if (len != name->len)
- return -1;
- return memcmp(str, name->name, len);
+ /*
+ * If the dentry name is stored in-line, then it may be concurrently
+ * modified by a rename. If this happens, the VFS will eventually retry
+ * the lookup, so it doesn't matter what ->d_compare() returns.
+ * However, it's unsafe to call utf8_strncasecmp() with an unstable
+ * string. Therefore, we have to copy the name into a temporary buffer.
+ */
+ if (len <= DNAME_INLINE_LEN - 1) {
+ memcpy(strbuf, str, len);
+ strbuf[len] = 0;
+ entry.name = strbuf;
+ /* prevent compiler from optimizing out the temporary buffer */
+ barrier();
}
- return f2fs_ci_compare(inode, name, &qstr, false);
+ res = utf8_strncasecmp(sbi->s_encoding, name, &entry);
+ if (res >= 0)
+ return res;
+
+ if (f2fs_has_strict_mode(sbi))
+ return -EINVAL;
+fallback:
+ if (len != name->len)
+ return 1;
+ return !!memcmp(str, name->name, len);
}
static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a26ea1e6ba88..03693d6b1c10 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2790,18 +2790,12 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
- void *ret;
-
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
- ret = kmalloc(size, flags);
- if (ret)
- return ret;
-
- return kvmalloc(size, flags);
+ return kmalloc(size, flags);
}
static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
@@ -2960,11 +2954,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
-extern int f2fs_ci_compare(const struct inode *parent,
- const struct qstr *name,
- const struct qstr *entry,
- bool quick);
-
/*
* dir.c
*/
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c3a9da79ac99..5d94abe467a4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2056,8 +2056,15 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (in != F2FS_GOING_DOWN_FULLSYNC) {
ret = mnt_want_write_file(filp);
- if (ret)
+ if (ret) {
+ if (ret == -EROFS) {
+ ret = 0;
+ f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+ trace_f2fs_shutdown(sbi, in, ret);
+ }
return ret;
+ }
}
switch (in) {
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f14401a77d60..90a20bd12961 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2933,7 +2933,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
- nm_i->nat_bits = f2fs_kzalloc(sbi,
+ nm_i->nat_bits = f2fs_kvzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
@@ -3066,9 +3066,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
int i;
nm_i->free_nid_bitmap =
- f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
- nm_i->nat_blocks),
- GFP_KERNEL);
+ f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *),
+ nm_i->nat_blocks),
+ GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e36543c9f2b7..f4b882ee48dd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1230,7 +1230,8 @@ static int f2fs_statfs_project(struct super_block *sb,
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
- curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ curblock = (dquot->dq_dqb.dqb_curspace +
+ dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
buf->f_blocks = limit;
buf->f_bfree = buf->f_bavail =
(buf->f_blocks > curblock) ?
@@ -2900,7 +2901,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi,
BITS_TO_LONGS(FDEV(devi).nr_blkz)
* sizeof(unsigned long),
GFP_KERNEL);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3dd37a998ea9..f8d8a8e34b80 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -18,6 +18,7 @@
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/fs.h>
static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
@@ -712,6 +713,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
spin_unlock(&io->lock);
ia->ap.args.end = fuse_aio_complete_req;
+ ia->ap.args.may_block = io->should_dirty;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
if (err)
fuse_aio_complete_req(fc, &ia->ap.args, err);
@@ -2147,10 +2149,8 @@ static int fuse_writepages(struct address_space *mapping,
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
- /* Ignore errors if we can write at least one page */
WARN_ON(!data.wpa->ia.ap.num_pages);
fuse_writepages_send(&data);
- err = 0;
}
if (data.ff)
fuse_file_put(data.ff, false, false);
@@ -2759,7 +2759,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
- iov->iov_len = _IOC_SIZE(cmd);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ iov->iov_len = sizeof(int);
+ break;
+ default:
+ iov->iov_len = _IOC_SIZE(cmd);
+ break;
+ }
if (_IOC_DIR(cmd) & _IOC_WRITE) {
in_iov = iov;
@@ -3279,13 +3288,11 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (fc->writeback_cache) {
- inode_lock(inode_in);
- err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
- inode_unlock(inode_in);
- if (err)
- return err;
- }
+ inode_lock(inode_in);
+ err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
+ inode_unlock(inode_in);
+ if (err)
+ return err;
inode_lock(inode_out);
@@ -3293,11 +3300,27 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
- if (fc->writeback_cache) {
- err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
- if (err)
- goto out;
- }
+ /*
+ * Write out dirty pages in the destination file before sending the COPY
+ * request to userspace. After the request is completed, truncate off
+ * pages (including partial ones) from the cache that have been copied,
+ * since these contain stale data at that point.
+ *
+ * This should be mostly correct, but if the COPY writes to partial
+ * pages (at the start or end) and the parts not covered by the COPY are
+ * written through a memory map after calling fuse_writeback_range(),
+ * then these partial page modifications will be lost on truncation.
+ *
+ * It is unlikely that someone would rely on such mixed style
+ * modifications. Yet this does give less guarantees than if the
+ * copying was performed with write(2).
+ *
+ * To fix this a i_mmap_sem style lock could be used to prevent new
+ * faults while the copy is ongoing.
+ */
+ err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
+ if (err)
+ goto out;
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
@@ -3318,6 +3341,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
+ truncate_inode_pages_range(inode_out->i_mapping,
+ ALIGN_DOWN(pos_out, PAGE_SIZE),
+ ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
+
if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ca344bf71404..d7cde216fc87 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -249,6 +249,7 @@ struct fuse_args {
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
+ bool may_block:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 16aec32f7f3d..5dca643a257c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -121,10 +121,12 @@ static void fuse_evict_inode(struct inode *inode)
}
}
-static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
+static int fuse_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+
sync_filesystem(sb);
- if (*flags & SB_MANDLOCK)
+ if (fc->sb_flags & SB_MANDLOCK)
return -EINVAL;
return 0;
@@ -473,6 +475,13 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct fuse_fs_context *ctx = fc->fs_private;
int opt;
+ /*
+ * Ignore options coming from mount(MS_REMOUNT) for backward
+ * compatibility.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
+
opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -815,7 +824,6 @@ static const struct super_operations fuse_super_operations = {
.evict_inode = fuse_evict_inode,
.write_inode = fuse_write_inode,
.drop_inode = generic_delete_inode,
- .remount_fs = fuse_remount_fs,
.put_super = fuse_put_super,
.umount_begin = fuse_umount_begin,
.statfs = fuse_statfs,
@@ -1289,6 +1297,7 @@ static int fuse_get_tree(struct fs_context *fc)
static const struct fs_context_operations fuse_context_ops = {
.free = fuse_free_fc,
.parse_param = fuse_parse_param,
+ .reconfigure = fuse_reconfigure,
.get_tree = fuse_get_tree,
};
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index a5c86048b96e..7505f8102762 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -55,6 +55,12 @@ struct virtio_fs_forget {
struct list_head list;
};
+struct virtio_fs_req_work {
+ struct fuse_req *req;
+ struct virtio_fs_vq *fsvq;
+ struct work_struct done_work;
+};
+
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
@@ -443,19 +449,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
}
/* Work function for request completion */
+static void virtio_fs_request_complete(struct fuse_req *req,
+ struct virtio_fs_vq *fsvq)
+{
+ struct fuse_pqueue *fpq = &fsvq->fud->pq;
+ struct fuse_conn *fc = fsvq->fud->fc;
+ struct fuse_args *args;
+ struct fuse_args_pages *ap;
+ unsigned int len, i, thislen;
+ struct page *page;
+
+ /*
+ * TODO verify that server properly follows FUSE protocol
+ * (oh.uniq, oh.len)
+ */
+ args = req->args;
+ copy_args_from_argbuf(args, req);
+
+ if (args->out_pages && args->page_zeroing) {
+ len = args->out_args[args->out_numargs - 1].size;
+ ap = container_of(args, typeof(*ap), args);
+ for (i = 0; i < ap->num_pages; i++) {
+ thislen = ap->descs[i].length;
+ if (len < thislen) {
+ WARN_ON(ap->descs[i].offset);
+ page = ap->pages[i];
+ zero_user_segment(page, len, thislen);
+ len = 0;
+ } else {
+ len -= thislen;
+ }
+ }
+ }
+
+ spin_lock(&fpq->lock);
+ clear_bit(FR_SENT, &req->flags);
+ spin_unlock(&fpq->lock);
+
+ fuse_request_end(fc, req);
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_complete_req_work(struct work_struct *work)
+{
+ struct virtio_fs_req_work *w =
+ container_of(work, typeof(*w), done_work);
+
+ virtio_fs_request_complete(w->req, w->fsvq);
+ kfree(w);
+}
+
static void virtio_fs_requests_done_work(struct work_struct *work)
{
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
done_work);
struct fuse_pqueue *fpq = &fsvq->fud->pq;
- struct fuse_conn *fc = fsvq->fud->fc;
struct virtqueue *vq = fsvq->vq;
struct fuse_req *req;
- struct fuse_args_pages *ap;
struct fuse_req *next;
- struct fuse_args *args;
- unsigned int len, i, thislen;
- struct page *page;
+ unsigned int len;
LIST_HEAD(reqs);
/* Collect completed requests off the virtqueue */
@@ -473,38 +527,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
/* End requests */
list_for_each_entry_safe(req, next, &reqs, list) {
- /*
- * TODO verify that server properly follows FUSE protocol
- * (oh.uniq, oh.len)
- */
- args = req->args;
- copy_args_from_argbuf(args, req);
-
- if (args->out_pages && args->page_zeroing) {
- len = args->out_args[args->out_numargs - 1].size;
- ap = container_of(args, typeof(*ap), args);
- for (i = 0; i < ap->num_pages; i++) {
- thislen = ap->descs[i].length;
- if (len < thislen) {
- WARN_ON(ap->descs[i].offset);
- page = ap->pages[i];
- zero_user_segment(page, len, thislen);
- len = 0;
- } else {
- len -= thislen;
- }
- }
- }
-
- spin_lock(&fpq->lock);
- clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
- spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
- spin_lock(&fsvq->lock);
- dec_in_flight_req(fsvq);
- spin_unlock(&fsvq->lock);
+ /* blocking async request completes in a worker context */
+ if (req->args->may_block) {
+ struct virtio_fs_req_work *w;
+
+ w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
+ INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
+ w->fsvq = fsvq;
+ w->req = req;
+ schedule_work(&w->done_work);
+ } else {
+ virtio_fs_request_complete(req, fsvq);
+ }
}
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 110e5c4db819..a4b6a49462a4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -881,8 +881,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
* @new: New transaction to be merged
*/
-static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
+static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
{
+ struct gfs2_trans *old = sdp->sd_log_tr;
+
WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
old->tr_num_buf_new += new->tr_num_buf_new;
@@ -893,6 +895,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
+
+ spin_lock(&sdp->sd_ail_lock);
+ list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
+ list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
+ spin_unlock(&sdp->sd_ail_lock);
}
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -904,7 +911,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_log_lock(sdp);
if (sdp->sd_log_tr) {
- gfs2_merge_trans(sdp->sd_log_tr, tr);
+ gfs2_merge_trans(sdp, tr);
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
sdp->sd_log_tr = tr;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 18daf494abab..e0c55765b06d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -911,7 +911,7 @@ fail:
}
static const match_table_t nolock_tokens = {
- { Opt_jid, "jid=%d\n", },
+ { Opt_jid, "jid=%d", },
{ Opt_err, NULL },
};
@@ -1168,7 +1168,17 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_per_node;
}
- if (!sb_rdonly(sb)) {
+ if (sb_rdonly(sb)) {
+ struct gfs2_holder freeze_gh;
+
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
+ GL_EXACT, &freeze_gh);
+ if (error) {
+ fs_err(sdp, "can't make FS RO: %d\n", error);
+ goto fail_per_node;
+ }
+ gfs2_glock_dq_uninit(&freeze_gh);
+ } else {
error = gfs2_make_fs_rw(sdp);
if (error) {
fs_err(sdp, "can't make FS RW: %d\n", error);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7fa3cd3fff4d..e0200406765c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -267,6 +267,9 @@ struct io_ring_ctx {
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
+
+ struct list_head task_list;
+ spinlock_t task_lock;
};
struct sqe_submit {
@@ -331,14 +334,18 @@ struct io_kiocb {
#define REQ_F_ISREG 2048 /* regular file */
#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
+#define REQ_F_CANCEL 16384 /* cancel request */
unsigned long fsize;
u64 user_data;
u32 result;
u32 sequence;
+ struct task_struct *task;
struct fs_struct *fs;
struct work_struct work;
+ struct task_struct *work_task;
+ struct list_head task_list;
};
#define IO_PLUG_THRESHOLD 2
@@ -425,6 +432,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cancel_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->task_list);
+ spin_lock_init(&ctx->task_lock);
return ctx;
}
@@ -492,6 +501,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
+ unsigned long flags;
int rw = 0;
if (req->submit.sqe) {
@@ -503,6 +513,13 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
}
}
+ req->task = current;
+
+ spin_lock_irqsave(&ctx->task_lock, flags);
+ list_add(&req->task_list, &ctx->task_list);
+ req->work_task = NULL;
+ spin_unlock_irqrestore(&ctx->task_lock, flags);
+
queue_work(ctx->sqo_wq[rw], &req->work);
}
@@ -2201,6 +2218,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)
old_cred = override_creds(ctx->creds);
async_list = io_async_list_from_sqe(ctx, req->submit.sqe);
+
+ allow_kernel_signal(SIGINT);
restart:
do {
struct sqe_submit *s = &req->submit;
@@ -2232,6 +2251,12 @@ restart:
}
if (!ret) {
+ req->work_task = current;
+ if (req->flags & REQ_F_CANCEL) {
+ ret = -ECANCELED;
+ goto end_req;
+ }
+
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
@@ -2246,6 +2271,12 @@ restart:
break;
cond_resched();
} while (1);
+end_req:
+ if (!list_empty(&req->task_list)) {
+ spin_lock_irq(&ctx->task_lock);
+ list_del_init(&req->task_list);
+ spin_unlock_irq(&ctx->task_lock);
+ }
}
/* drop submission reference */
@@ -2311,6 +2342,7 @@ restart:
}
out:
+ disallow_signal(SIGINT);
if (cur_mm) {
set_fs(old_fs);
unuse_mm(cur_mm);
@@ -3675,12 +3707,32 @@ static int io_uring_fasync(int fd, struct file *file, int on)
return fasync_helper(fd, file, on, &ctx->cq_fasync);
}
+static void io_cancel_async_work(struct io_ring_ctx *ctx,
+ struct task_struct *task)
+{
+ if (list_empty(&ctx->task_list))
+ return;
+
+ spin_lock_irq(&ctx->task_lock);
+ while (!list_empty(&ctx->task_list)) {
+ struct io_kiocb *req;
+
+ req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list);
+ list_del_init(&req->task_list);
+ req->flags |= REQ_F_CANCEL;
+ if (req->work_task && (!task || req->task == task))
+ send_sig(SIGINT, req->work_task, 1);
+ }
+ spin_unlock_irq(&ctx->task_lock);
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);
+ io_cancel_async_work(ctx, NULL);
io_kill_timeouts(ctx);
io_poll_remove_all(ctx);
io_iopoll_reap_events(ctx);
@@ -3688,6 +3740,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_ring_ctx_free(ctx);
}
+static int io_uring_flush(struct file *file, void *data)
+{
+ struct io_ring_ctx *ctx = file->private_data;
+
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+ io_cancel_async_work(ctx, current);
+
+ return 0;
+}
+
static int io_uring_release(struct inode *inode, struct file *file)
{
struct io_ring_ctx *ctx = file->private_data;
@@ -3792,6 +3854,7 @@ out_fput:
static const struct file_operations io_uring_fops = {
.release = io_uring_release,
+ .flush = io_uring_flush,
.mmap = io_uring_mmap,
.poll = io_uring_poll,
.fasync = io_uring_fasync,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c1ce2805c563..fa58835668a6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -96,7 +96,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
EXPORT_SYMBOL(jbd2_inode_cache);
-static void __journal_abort_soft (journal_t *journal, int errno);
static int jbd2_journal_create_slab(size_t slab_size);
#ifdef CONFIG_JBD2_DEBUG
@@ -805,7 +804,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
"at offset %lu on %s\n",
__func__, blocknr, journal->j_devname);
err = -EIO;
- __journal_abort_soft(journal, err);
+ jbd2_journal_abort(journal, err);
}
} else {
*retp = blocknr; /* +journal->j_blk_offset */
@@ -2070,64 +2069,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
return err;
}
-/*
- * Journal abort has very specific semantics, which we describe
- * for journal abort.
- *
- * Two internal functions, which provide abort to the jbd layer
- * itself are here.
- */
-
-/*
- * Quick version for internal journal use (doesn't lock the journal).
- * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
- * and don't attempt to make any other journal updates.
- */
-void __jbd2_journal_abort_hard(journal_t *journal)
-{
- transaction_t *transaction;
-
- if (journal->j_flags & JBD2_ABORT)
- return;
-
- printk(KERN_ERR "Aborting journal on device %s.\n",
- journal->j_devname);
-
- write_lock(&journal->j_state_lock);
- journal->j_flags |= JBD2_ABORT;
- transaction = journal->j_running_transaction;
- if (transaction)
- __jbd2_log_start_commit(journal, transaction->t_tid);
- write_unlock(&journal->j_state_lock);
-}
-
-/* Soft abort: record the abort error status in the journal superblock,
- * but don't do any other IO. */
-static void __journal_abort_soft (journal_t *journal, int errno)
-{
- int old_errno;
-
- write_lock(&journal->j_state_lock);
- old_errno = journal->j_errno;
- if (!journal->j_errno || errno == -ESHUTDOWN)
- journal->j_errno = errno;
-
- if (journal->j_flags & JBD2_ABORT) {
- write_unlock(&journal->j_state_lock);
- if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN)
- jbd2_journal_update_sb_errno(journal);
- return;
- }
- write_unlock(&journal->j_state_lock);
-
- __jbd2_journal_abort_hard(journal);
-
- jbd2_journal_update_sb_errno(journal);
- write_lock(&journal->j_state_lock);
- journal->j_flags |= JBD2_REC_ERR;
- write_unlock(&journal->j_state_lock);
-}
-
/**
* void jbd2_journal_abort () - Shutdown the journal immediately.
* @journal: the journal to shutdown.
@@ -2171,7 +2112,47 @@ static void __journal_abort_soft (journal_t *journal, int errno)
void jbd2_journal_abort(journal_t *journal, int errno)
{
- __journal_abort_soft(journal, errno);
+ transaction_t *transaction;
+
+ /*
+ * ESHUTDOWN always takes precedence because a file system check
+ * caused by any other journal abort error is not required after
+ * a shutdown triggered.
+ */
+ write_lock(&journal->j_state_lock);
+ if (journal->j_flags & JBD2_ABORT) {
+ int old_errno = journal->j_errno;
+
+ write_unlock(&journal->j_state_lock);
+ if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
+ journal->j_errno = errno;
+ jbd2_journal_update_sb_errno(journal);
+ }
+ return;
+ }
+
+ /*
+ * Mark the abort as occurred and start current running transaction
+ * to release all journaled buffer.
+ */
+ pr_err("Aborting journal on device %s.\n", journal->j_devname);
+
+ journal->j_flags |= JBD2_ABORT;
+ journal->j_errno = errno;
+ transaction = journal->j_running_transaction;
+ if (transaction)
+ __jbd2_log_start_commit(journal, transaction->t_tid);
+ write_unlock(&journal->j_state_lock);
+
+ /*
+ * Record errno to the journal super block, so that fsck and jbd2
+ * layer could realise that a filesystem check is needed.
+ */
+ jbd2_journal_update_sb_errno(journal);
+
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_REC_ERR;
+ write_unlock(&journal->j_state_lock);
}
/**
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6b0bf4ebd812..70cf8c5760c7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -367,8 +367,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
- inode_dio_end(inode);
-
if (dreq->iocb) {
long res = (long) dreq->error;
if (dreq->count != 0) {
@@ -380,7 +378,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
}
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
@@ -510,8 +511,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
@@ -923,8 +926,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 95dc90570786..7b3136753205 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
+ inode_dio_wait(inode);
nfs_file_clear_open_context(filp);
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5657b7f2611f..1741d902b0d8 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -984,9 +984,8 @@ retry:
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -1008,7 +1007,10 @@ retry:
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_RW,
@@ -1018,6 +1020,7 @@ out_mds:
pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3802c88e8372..6de41f741280 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -826,6 +826,8 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
do_update |= cache_validity & NFS_INO_INVALID_ATIME;
if (request_mask & (STATX_CTIME|STATX_MTIME))
do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+ if (request_mask & STATX_BLOCKS)
+ do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
if (do_update) {
/* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC))
@@ -1750,7 +1752,8 @@ out_noforce:
status = nfs_post_op_update_inode_locked(inode, fattr,
NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME
- | NFS_INO_INVALID_MTIME);
+ | NFS_INO_INVALID_MTIME
+ | NFS_INO_INVALID_BLOCKS);
return status;
}
@@ -1857,7 +1860,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED
- | NFS_INO_REVAL_PAGECACHE);
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_INVALID_BLOCKS);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
@@ -2019,8 +2023,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
- else
+ else {
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_BLOCKS
+ | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
+ }
/* Update attrtimeo value if we're out of the unstable period */
if (attr_changed) {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e257653f25ab..1a1bd2fe6e98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
slot->seq_nr_last_acked = seqnr;
}
+static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred,
+ struct nfs4_slot *slot)
+{
+ struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true);
+ if (!IS_ERR(task))
+ rpc_put_task_async(task);
+}
+
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
goto out;
session = slot->table->session;
+ clp = session->clp;
trace_nfs4_sequence_done(session, res);
@@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
nfs4_slot_sequence_acked(slot, slot->seq_nr);
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
- clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags,
@@ -852,10 +860,18 @@ static int nfs41_sequence_process(struct rpc_task *task,
/*
* Were one or more calls using this slot interrupted?
* If the server never received the request, then our
- * transmitted slot sequence number may be too high.
+ * transmitted slot sequence number may be too high. However,
+ * if the server did receive the request then it might
+ * accidentally give us a reply with a mismatched operation.
+ * We can sort this out by sending a lone sequence operation
+ * to the server on the same slot.
*/
if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
slot->seq_nr--;
+ if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) {
+ nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot);
+ res->sr_slot = NULL;
+ }
goto retry_nowait;
}
/*
@@ -7870,7 +7886,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
}
static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
- .rpc_call_done = &nfs4_bind_one_conn_to_session_done,
+ .rpc_call_done = nfs4_bind_one_conn_to_session_done,
};
/*
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 10ec5ecdf117..65c331f75e9c 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -78,6 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
+int nfsd_drc_slab_create(void);
+void nfsd_drc_slab_free(void);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9a4ef815fb8c..ed53e206a299 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -139,7 +139,6 @@ struct nfsd_net {
* Duplicate reply cache
*/
struct nfsd_drc_bucket *drc_hashtbl;
- struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */
unsigned int max_drc_entries;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index afca3287184b..efe55d101b0e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1230,6 +1230,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
+ if (c)
+ svc_xprt_put(c->cn_xprt);
return;
}
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8650a97e2ba9..9af9b673f292 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7705,9 +7705,14 @@ nfs4_state_start_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
- ret = nfs4_state_create_net(net);
+ ret = get_nfsdfs(net);
if (ret)
return ret;
+ ret = nfs4_state_create_net(net);
+ if (ret) {
+ mntput(nn->nfsd_mnt);
+ return ret;
+ }
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
@@ -7776,6 +7781,7 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
+ mntput(nn->nfsd_mnt);
}
void
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 96352ab7bd81..4a258065188e 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -36,6 +36,8 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
+static struct kmem_cache *drc_slab;
+
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
@@ -95,7 +97,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
{
struct svc_cacherep *rp;
- rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
+ rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@@ -129,7 +131,7 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
atomic_dec(&nn->num_drc_entries);
nn->drc_mem_usage -= sizeof(*rp);
}
- kmem_cache_free(nn->drc_slab, rp);
+ kmem_cache_free(drc_slab, rp);
}
static void
@@ -141,6 +143,18 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
spin_unlock(&b->cache_lock);
}
+int nfsd_drc_slab_create(void)
+{
+ drc_slab = kmem_cache_create("nfsd_drc",
+ sizeof(struct svc_cacherep), 0, 0, NULL);
+ return drc_slab ? 0: -ENOMEM;
+}
+
+void nfsd_drc_slab_free(void)
+{
+ kmem_cache_destroy(drc_slab);
+}
+
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
@@ -159,18 +173,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
if (status)
goto out_nomem;
- nn->drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct svc_cacherep), 0, 0, NULL);
- if (!nn->drc_slab)
- goto out_shrinker;
-
nn->drc_hashtbl = kcalloc(hashsize,
sizeof(*nn->drc_hashtbl), GFP_KERNEL);
if (!nn->drc_hashtbl) {
nn->drc_hashtbl = vzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)));
if (!nn->drc_hashtbl)
- goto out_slab;
+ goto out_shrinker;
}
for (i = 0; i < hashsize; i++) {
@@ -180,8 +189,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
nn->drc_hashsize = hashsize;
return 0;
-out_slab:
- kmem_cache_destroy(nn->drc_slab);
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
@@ -209,8 +216,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
nn->drc_hashtbl = NULL;
nn->drc_hashsize = 0;
- kmem_cache_destroy(nn->drc_slab);
- nn->drc_slab = NULL;
}
/*
@@ -464,8 +469,7 @@ found_entry:
rtn = RC_REPLY;
break;
default:
- printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(b, rp, nn);
+ WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
}
goto out;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d77c5261c03c..be418fccc9d8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1335,6 +1335,7 @@ void nfsd_client_rmdir(struct dentry *dentry)
WARN_ON_ONCE(ret);
fsnotify_rmdir(dir, dentry);
d_delete(dentry);
+ dput(dentry);
inode_unlock(dir);
}
@@ -1424,6 +1425,18 @@ static struct file_system_type nfsd_fs_type = {
};
MODULE_ALIAS_FS("nfsd");
+int get_nfsdfs(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct vfsmount *mnt;
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+ nn->nfsd_mnt = mnt;
+ return 0;
+}
+
#ifdef CONFIG_PROC_FS
static int create_proc_exports_entry(void)
{
@@ -1452,7 +1465,6 @@ unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
- struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1479,16 +1491,8 @@ static __net_init int nfsd_init_net(struct net *net)
init_waitqueue_head(&nn->ntf_wq);
seqlock_init(&nn->boot_lock);
- mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
- if (IS_ERR(mnt)) {
- retval = PTR_ERR(mnt);
- goto out_mount_err;
- }
- nn->nfsd_mnt = mnt;
return 0;
-out_mount_err:
- nfsd_reply_cache_shutdown(nn);
out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
@@ -1501,7 +1505,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- mntput(nn->nfsd_mnt);
nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
@@ -1534,6 +1537,9 @@ static int __init init_nfsd(void)
goto out_free_slabs;
nfsd_fault_inject_init(); /* nfsd fault injection controls */
nfsd_stat_init(); /* Statistics */
+ retval = nfsd_drc_slab_create();
+ if (retval)
+ goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1547,6 +1553,8 @@ out_free_all:
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
+ nfsd_drc_slab_free();
+out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
nfsd4_exit_pnfs();
@@ -1561,6 +1569,7 @@ out_unregister_pernet:
static void __exit exit_nfsd(void)
{
+ nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index af2947551e9c..4ff0c5318a02 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -87,6 +87,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
+int get_nfsdfs(struct net *);
+
struct nfsdfs_client {
struct kref cl_ref;
void (*cl_release)(struct kref *kref);
@@ -97,6 +99,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 005d1802ab40..b6f4b552c9af 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1184,6 +1184,9 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+ if (!IS_POSIXACL(dirp))
+ iap->ia_mode &= ~current_umask();
+
err = 0;
host_err = 0;
switch (type) {
@@ -1416,6 +1419,9 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
+ if (!IS_POSIXACL(dirp))
+ iap->ia_mode &= ~current_umask();
+
host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
if (host_err < 0) {
fh_drop_write(fhp);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8a2e284ccfcd..e2c34c704185 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_nfs_sync_lops, osb);
}
+static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
+{
+ ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ init_rwsem(&osb->nfs_sync_rwlock);
+}
+
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
{
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
@@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
+ if (ex)
+ down_write(&osb->nfs_sync_rwlock);
+ else
+ down_read(&osb->nfs_sync_rwlock);
+
if (ocfs2_mount_local(osb))
return 0;
@@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
+ if (ex)
+ up_write(&osb->nfs_sync_rwlock);
+ else
+ up_read(&osb->nfs_sync_rwlock);
}
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
@@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
- ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ ocfs2_nfs_sync_lock_init(osb);
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9150cfa4df7d..9461bd3e1c0c 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -394,6 +394,7 @@ struct ocfs2_super
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
+ struct rw_semaphore nfs_sync_rwlock;
struct ocfs2_lock_res osb_trim_fs_lockres;
struct mutex obs_trim_fs_mutex;
struct ocfs2_dlm_debug *osb_dlm_debug;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 0db4a7ec58a2..dcef83c8796d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -290,7 +290,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 69c21a3843af..503e724d39f5 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2827,9 +2827,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- suballoc_slot);
+ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+ else
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 73c9775215b3..11dd8177770d 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -482,7 +482,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
if (IS_ERR_OR_NULL(this))
return this;
- if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+ if (ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
this = ERR_PTR(-EIO);
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 15e4fa288475..7a08a576f7b2 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -21,13 +21,16 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
return 'm';
}
+/* No atime modificaton nor notify on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+
static struct file *ovl_open_realfile(const struct file *file,
struct inode *realinode)
{
struct inode *inode = file_inode(file);
struct file *realfile;
const struct cred *old_cred;
- int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
+ int flags = file->f_flags | OVL_OPEN_FLAGS;
old_cred = ovl_override_creds(inode->i_sb);
realfile = open_with_fake_path(&file->f_path, flags, realinode,
@@ -48,8 +51,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
struct inode *inode = file_inode(file);
int err;
- /* No atime modificaton on underlying */
- flags |= O_NOATIME | FMODE_NONOTIFY;
+ flags |= OVL_OPEN_FLAGS;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
@@ -102,7 +104,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
}
/* Did the flags change since open? */
- if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
+ if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
return ovl_change_flags(real->file, file->f_flags);
return 0;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7621ff176d15..d6b724beb304 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1258,6 +1258,18 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
if (!ofs->config.nfs_export && !ofs->upper_mnt)
return true;
+ /*
+ * We allow using single lower with null uuid for index and nfs_export
+ * for example to support those features with single lower squashfs.
+ * To avoid regressions in setups of overlay with re-formatted lower
+ * squashfs, do not allow decoding origin with lower null uuid unless
+ * user opted-in to one of the new features that require following the
+ * lower inode of non-dir upper.
+ */
+ if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
+ uuid_is_null(uuid))
+ return false;
+
for (i = 0; i < ofs->numlowerfs; i++) {
/*
* We use uuid to associate an overlay lower file handle with a
@@ -1344,14 +1356,23 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
if (err < 0)
goto out;
+ /*
+ * Check if lower root conflicts with this overlay layers before
+ * checking if it is in-use as upperdir/workdir of "another"
+ * mount, because we do not bother to check in ovl_is_inuse() if
+ * the upperdir/workdir is in fact in-use by our
+ * upperdir/workdir.
+ */
err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
if (err)
goto out;
if (ovl_is_inuse(stack[i].dentry)) {
err = ovl_report_in_use(ofs, "lowerdir");
- if (err)
+ if (err) {
+ iput(trap);
goto out;
+ }
}
mnt = clone_private_mount(&stack[i]);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 533b04aaf6f6..0a36f532cf86 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2598,6 +2598,13 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
return __this_address;
+ if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
+ return __this_address;
+
+ if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
+ be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
+ return __this_address;
+
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
@@ -2609,6 +2616,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
return __this_address;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
+ return __this_address;
+
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -2623,6 +2634,11 @@ xfs_agf_verify(
return __this_address;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_refcount_blocks) >
+ be32_to_cpu(agf->agf_length))
+ return __this_address;
+
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
return __this_address;