summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/fs_probe.c39
-rw-r--r--fs/afs/inode.c18
-rw-r--r--fs/afs/internal.h9
-rw-r--r--fs/afs/misc.c52
-rw-r--r--fs/afs/rotate.c53
-rw-r--r--fs/afs/rxrpc.c11
-rw-r--r--fs/afs/vl_probe.c45
-rw-r--r--fs/afs/vl_rotate.c50
-rw-r--r--fs/aio.c3
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/file.c24
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/send.c11
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/tree-checker.c8
-rw-r--r--fs/cachefiles/namei.c8
-rw-r--r--fs/cachefiles/rdwr.c9
-rw-r--r--fs/cachefiles/xattr.c3
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c31
-rw-r--r--fs/dax.c89
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/exportfs/expfs.c3
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/fscache/object.c3
-rw-r--r--fs/fuse/dir.c26
-rw-r--r--fs/fuse/file.c64
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/iomap.c51
-rw-r--r--fs/nfs/callback_proc.c22
-rw-r--r--fs/nfs/direct.c9
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c23
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c19
-rw-r--r--fs/nfs/nfs42proc.c19
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4state.c16
-rw-r--r--fs/nilfs2/btnode.c4
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/move_extents.c47
-rw-r--r--fs/overlayfs/dir.c14
-rw-r--r--fs/overlayfs/export.c6
-rw-r--r--fs/overlayfs/inode.c17
-rw-r--r--fs/pstore/ram.c15
-rw-r--r--fs/read_write.c17
-rw-r--r--fs/splice.c7
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/udf/super.c16
-rw-r--r--fs/udf/unicode.c14
-rw-r--r--fs/userfaultfd.c16
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c11
-rw-r--r--fs/xfs/xfs_bmap_util.c14
-rw-r--r--fs/xfs/xfs_bmap_util.h3
-rw-r--r--fs/xfs/xfs_buf_item.c28
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_reflink.c18
-rw-r--r--fs/xfs/xfs_trace.h5
69 files changed, 641 insertions, 402 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 43dea3b00c29..8a2562e3a316 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1075,8 +1075,6 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
if (fc->ac.error < 0)
return;
- d_drop(new_dentry);
-
inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
newfid, newstatus, newcb, fc->cbi);
if (IS_ERR(inode)) {
@@ -1090,7 +1088,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
afs_vnode_commit_status(fc, vnode, 0);
- d_add(new_dentry, inode);
+ d_instantiate(new_dentry, inode);
}
/*
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index d049cb459742..fde6b4d4121e 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -61,8 +61,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
@@ -132,12 +135,14 @@ out:
static int afs_do_probe_fileserver(struct afs_net *net,
struct afs_server *server,
struct key *key,
- unsigned int server_index)
+ unsigned int server_index,
+ struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
- int ret;
+ bool in_progress = false;
+ int err;
_enter("%pU", &server->uuid);
@@ -151,15 +156,17 @@ static int afs_do_probe_fileserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
- ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+ err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
true);
- if (ret != -EINPROGRESS) {
- afs_fs_probe_done(server);
- return ret;
- }
+ if (err == -EINPROGRESS)
+ in_progress = true;
+ else
+ afs_prioritise_error(_e, err, ac.abort_code);
}
- return 0;
+ if (!in_progress)
+ afs_fs_probe_done(server);
+ return in_progress;
}
/*
@@ -169,21 +176,23 @@ int afs_probe_fileservers(struct afs_net *net, struct key *key,
struct afs_server_list *list)
{
struct afs_server *server;
- int i, ret;
+ struct afs_error e;
+ bool in_progress = false;
+ int i;
+ e.error = 0;
+ e.responded = false;
for (i = 0; i < list->nr_servers; i++) {
server = list->servers[i].server;
if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
continue;
- if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
- ret = afs_do_probe_fileserver(net, server, key, i);
- if (ret)
- return ret;
- }
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
+ afs_do_probe_fileserver(net, server, key, i, &e))
+ in_progress = true;
}
- return 0;
+ return in_progress ? 0 : e.error;
}
/*
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4c6d8e1112c2..6b17d3620414 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -382,7 +382,7 @@ void afs_zap_data(struct afs_vnode *vnode)
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
time64_t now = ktime_get_real_seconds();
- bool valid = false;
+ bool valid;
int ret;
_enter("{v={%llx:%llu} fl=%lx},%x",
@@ -402,15 +402,21 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
vnode->cb_v_break = vnode->volume->cb_v_break;
valid = false;
} else if (vnode->status.type == AFS_FTYPE_DIR &&
- test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
- vnode->cb_expires_at - 10 > now) {
- valid = true;
- } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
- vnode->cb_expires_at - 10 > now) {
+ (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) ||
+ vnode->cb_expires_at - 10 <= now)) {
+ valid = false;
+ } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) ||
+ vnode->cb_expires_at - 10 <= now) {
+ valid = false;
+ } else {
valid = true;
}
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
+ } else {
+ vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ vnode->cb_v_break = vnode->volume->cb_v_break;
+ valid = false;
}
read_sequnlock_excl(&vnode->cb_lock);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5da3b09b7518..8871b9e8645f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -696,6 +696,14 @@ struct afs_interface {
};
/*
+ * Error prioritisation and accumulation.
+ */
+struct afs_error {
+ short error; /* Accumulated error */
+ bool responded; /* T if server responded */
+};
+
+/*
* Cursor for iterating over a server's address list.
*/
struct afs_addr_cursor {
@@ -1015,6 +1023,7 @@ static inline void __afs_stat(atomic_t *s)
* misc.c
*/
extern int afs_abort_to_error(u32);
+extern void afs_prioritise_error(struct afs_error *, int, u32);
/*
* mntpt.c
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 700a5fa7f4ec..bbb1fd51b019 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -118,3 +118,55 @@ int afs_abort_to_error(u32 abort_code)
default: return -EREMOTEIO;
}
}
+
+/*
+ * Select the error to report from a set of errors.
+ */
+void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+{
+ switch (error) {
+ case 0:
+ return;
+ default:
+ if (e->error == -ETIMEDOUT ||
+ e->error == -ETIME)
+ return;
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (e->error == -ENOMEM ||
+ e->error == -ENONET)
+ return;
+ case -ENOMEM:
+ case -ENONET:
+ if (e->error == -ERFKILL)
+ return;
+ case -ERFKILL:
+ if (e->error == -EADDRNOTAVAIL)
+ return;
+ case -EADDRNOTAVAIL:
+ if (e->error == -ENETUNREACH)
+ return;
+ case -ENETUNREACH:
+ if (e->error == -EHOSTUNREACH)
+ return;
+ case -EHOSTUNREACH:
+ if (e->error == -EHOSTDOWN)
+ return;
+ case -EHOSTDOWN:
+ if (e->error == -ECONNREFUSED)
+ return;
+ case -ECONNREFUSED:
+ if (e->error == -ECONNRESET)
+ return;
+ case -ECONNRESET: /* Responded, but call expired. */
+ if (e->responded)
+ return;
+ e->error = error;
+ return;
+
+ case -ECONNABORTED:
+ e->responded = true;
+ e->error = afs_abort_to_error(abort_code);
+ return;
+ }
+}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 00504254c1c2..c3ae324781f8 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -136,7 +136,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = fc->vnode;
- u32 rtt, abort_code;
+ struct afs_error e;
+ u32 rtt;
int error = fc->ac.error, i;
_enter("%lx[%d],%lx[%d],%d,%d",
@@ -306,8 +307,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
if (fc->error != -EDESTADDRREQ)
goto iterate_address;
/* Fall through */
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EHOSTDOWN:
case -ECONNREFUSED:
_debug("no conn");
fc->error = error;
@@ -446,50 +450,15 @@ no_more_servers:
if (fc->flags & AFS_FS_CURSOR_VBUSY)
goto restart_from_beginning;
- abort_code = 0;
- error = -EDESTADDRREQ;
+ e.error = -EDESTADDRREQ;
+ e.responded = false;
for (i = 0; i < fc->server_list->nr_servers; i++) {
struct afs_server *s = fc->server_list->servers[i].server;
- int probe_error = READ_ONCE(s->probe.error);
- switch (probe_error) {
- case 0:
- continue;
- default:
- if (error == -ETIMEDOUT ||
- error == -ETIME)
- continue;
- case -ETIMEDOUT:
- case -ETIME:
- if (error == -ENOMEM ||
- error == -ENONET)
- continue;
- case -ENOMEM:
- case -ENONET:
- if (error == -ENETUNREACH)
- continue;
- case -ENETUNREACH:
- if (error == -EHOSTUNREACH)
- continue;
- case -EHOSTUNREACH:
- if (error == -ECONNREFUSED)
- continue;
- case -ECONNREFUSED:
- if (error == -ECONNRESET)
- continue;
- case -ECONNRESET: /* Responded, but call expired. */
- if (error == -ECONNABORTED)
- continue;
- case -ECONNABORTED:
- abort_code = s->probe.abort_code;
- error = probe_error;
- continue;
- }
+ afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ s->probe.abort_code);
}
- if (error == -ECONNABORTED)
- error = afs_abort_to_error(abort_code);
-
failed_set_error:
fc->error = error;
failed:
@@ -553,8 +522,11 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
_leave(" = f [abort]");
return false;
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
@@ -633,6 +605,7 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
struct afs_net *net = afs_v2net(fc->vnode);
if (fc->error == -EDESTADDRREQ ||
+ fc->error == -EADDRNOTAVAIL ||
fc->error == -ENETUNREACH ||
fc->error == -EHOSTUNREACH)
afs_dump_edestaddrreq(fc);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 59970886690f..a7b44863d502 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -576,6 +576,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
{
signed long rtt2, timeout;
long ret;
+ bool stalled = false;
u64 rtt;
u32 life, last_life;
@@ -609,12 +610,20 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
if (timeout == 0 &&
- life == last_life && signal_pending(current))
+ life == last_life && signal_pending(current)) {
+ if (stalled)
break;
+ __set_current_state(TASK_RUNNING);
+ rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
+ timeout = rtt2;
+ stalled = true;
+ continue;
+ }
if (life != last_life) {
timeout = rtt2;
last_life = life;
+ stalled = false;
}
timeout = schedule_timeout(timeout);
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index c0f616bd70cb..f0b032976487 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -61,8 +61,11 @@ void afs_vlserver_probe_result(struct afs_call *call)
afs_io_error(call, afs_io_error_vl_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
@@ -129,15 +132,17 @@ out:
* Probe all of a vlserver's addresses to find out the best route and to
* query its capabilities.
*/
-static int afs_do_probe_vlserver(struct afs_net *net,
- struct afs_vlserver *server,
- struct key *key,
- unsigned int server_index)
+static bool afs_do_probe_vlserver(struct afs_net *net,
+ struct afs_vlserver *server,
+ struct key *key,
+ unsigned int server_index,
+ struct afs_error *_e)
{
struct afs_addr_cursor ac = {
.index = 0,
};
- int ret;
+ bool in_progress = false;
+ int err;
_enter("%s", server->name);
@@ -151,15 +156,17 @@ static int afs_do_probe_vlserver(struct afs_net *net,
server->probe.rtt = UINT_MAX;
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
- ret = afs_vl_get_capabilities(net, &ac, key, server,
+ err = afs_vl_get_capabilities(net, &ac, key, server,
server_index, true);
- if (ret != -EINPROGRESS) {
- afs_vl_probe_done(server);
- return ret;
- }
+ if (err == -EINPROGRESS)
+ in_progress = true;
+ else
+ afs_prioritise_error(_e, err, ac.abort_code);
}
- return 0;
+ if (!in_progress)
+ afs_vl_probe_done(server);
+ return in_progress;
}
/*
@@ -169,21 +176,23 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
struct afs_vlserver_list *vllist)
{
struct afs_vlserver *server;
- int i, ret;
+ struct afs_error e;
+ bool in_progress = false;
+ int i;
+ e.error = 0;
+ e.responded = false;
for (i = 0; i < vllist->nr_servers; i++) {
server = vllist->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
continue;
- if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
- ret = afs_do_probe_vlserver(net, server, key, i);
- if (ret)
- return ret;
- }
+ if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
+ afs_do_probe_vlserver(net, server, key, i, &e))
+ in_progress = true;
}
- return 0;
+ return in_progress ? 0 : e.error;
}
/*
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index b64a284b99d2..7adde83a0648 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -71,8 +71,9 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
{
struct afs_addr_list *alist;
struct afs_vlserver *vlserver;
+ struct afs_error e;
u32 rtt;
- int error = vc->ac.error, abort_code, i;
+ int error = vc->ac.error, i;
_enter("%lx[%d],%lx[%d],%d,%d",
vc->untried, vc->index,
@@ -119,8 +120,11 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
goto failed;
}
+ case -ERFKILL:
+ case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
@@ -235,50 +239,15 @@ no_more_servers:
if (vc->flags & AFS_VL_CURSOR_RETRY)
goto restart_from_beginning;
- abort_code = 0;
- error = -EDESTADDRREQ;
+ e.error = -EDESTADDRREQ;
+ e.responded = false;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
- int probe_error = READ_ONCE(s->probe.error);
- switch (probe_error) {
- case 0:
- continue;
- default:
- if (error == -ETIMEDOUT ||
- error == -ETIME)
- continue;
- case -ETIMEDOUT:
- case -ETIME:
- if (error == -ENOMEM ||
- error == -ENONET)
- continue;
- case -ENOMEM:
- case -ENONET:
- if (error == -ENETUNREACH)
- continue;
- case -ENETUNREACH:
- if (error == -EHOSTUNREACH)
- continue;
- case -EHOSTUNREACH:
- if (error == -ECONNREFUSED)
- continue;
- case -ECONNREFUSED:
- if (error == -ECONNRESET)
- continue;
- case -ECONNRESET: /* Responded, but call expired. */
- if (error == -ECONNABORTED)
- continue;
- case -ECONNABORTED:
- abort_code = s->probe.abort_code;
- error = probe_error;
- continue;
- }
+ afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+ s->probe.abort_code);
}
- if (error == -ECONNABORTED)
- error = afs_abort_to_error(abort_code);
-
failed_set_error:
vc->error = error;
failed:
@@ -341,6 +310,7 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
struct afs_net *net = vc->cell->net;
if (vc->error == -EDESTADDRREQ ||
+ vc->error == -EADDRNOTAVAIL ||
vc->error == -ENETUNREACH ||
vc->error == -EHOSTUNREACH)
afs_vl_dump_edestaddrreq(vc);
diff --git a/fs/aio.c b/fs/aio.c
index 301e6314183b..aac9659381d2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -45,6 +45,7 @@
#include <asm/kmap_types.h>
#include <linux/uaccess.h>
+#include <linux/nospec.h>
#include "internal.h"
@@ -1038,6 +1039,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
+ id = array_index_nospec(id, table->nr);
ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
if (percpu_ref_tryget_live(&ctx->users))
@@ -1436,6 +1438,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
+ fput(req->ki_filp);
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f0b6d1936e8..6d776717d8b3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -477,9 +477,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
int mirror_num = 0;
int failed_mirror = 0;
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
if (!ret) {
@@ -493,15 +493,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
break;
}
- /*
- * This buffer's crc is fine, but its contents are corrupted, so
- * there is no reason to read the other copies, they won't be
- * any less wrong.
- */
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
- ret == -EUCLEAN)
- break;
-
num_copies = btrfs_num_copies(fs_info,
eb->start, eb->len);
if (num_copies == 1)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a3c22e16509b..58e93bce3036 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2089,6 +2089,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
+ * Before we acquired the inode's lock, someone may have dirtied more
+ * pages in the target range. We need to make sure that writeback for
+ * any such pages does not start while we are logging the inode, because
+ * if it does, any of the following might happen when we are not doing a
+ * full inode sync:
+ *
+ * 1) We log an extent after its writeback finishes but before its
+ * checksums are added to the csum tree, leading to -EIO errors
+ * when attempting to read the extent after a log replay.
+ *
+ * 2) We can end up logging an extent before its writeback finishes.
+ * Therefore after the log replay we will have a file extent item
+ * pointing to an unwritten extent (and no data checksums as well).
+ *
+ * So trigger writeback for any eventual new dirty pages and then we
+ * wait for all ordered extents to complete below.
+ */
+ ret = start_ordered_ops(inode, start, end);
+ if (ret) {
+ inode_unlock(inode);
+ goto out;
+ }
+
+ /*
* We have to do this here to avoid the priority inversion of waiting on
* IO of a lower priority task while holding a transaciton open.
*/
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 45868fd76209..f70825af6438 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2659,7 +2659,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
int i;
u64 *i_qgroups;
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
u32 level_size = 0;
@@ -2669,6 +2669,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
goto out;
+ quota_root = fs_info->quota_root;
if (!quota_root) {
ret = -EINVAL;
goto out;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 924116f654a1..a3f75b8926d4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3959,6 +3959,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans);
+ trans = NULL;
continue;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 094cc1444a90..5be83b5a1b43 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3340,7 +3340,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
kfree(m);
}
-static void tail_append_pending_moves(struct pending_dir_move *moves,
+static void tail_append_pending_moves(struct send_ctx *sctx,
+ struct pending_dir_move *moves,
struct list_head *stack)
{
if (list_empty(&moves->list)) {
@@ -3351,6 +3352,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves,
list_add_tail(&moves->list, stack);
list_splice_tail(&list, stack);
}
+ if (!RB_EMPTY_NODE(&moves->node)) {
+ rb_erase(&moves->node, &sctx->pending_dir_moves);
+ RB_CLEAR_NODE(&moves->node);
+ }
}
static int apply_children_dir_moves(struct send_ctx *sctx)
@@ -3365,7 +3370,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
return 0;
INIT_LIST_HEAD(&stack);
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
while (!list_empty(&stack)) {
pm = list_first_entry(&stack, struct pending_dir_move, list);
@@ -3376,7 +3381,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
goto out;
pm = get_pending_dir_moves(sctx, parent_ino);
if (pm)
- tail_append_pending_moves(pm, &stack);
+ tail_append_pending_moves(sctx, pm, &stack);
}
return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index cbc9d0d2c12d..645fc81e2a94 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2237,6 +2237,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
vol = memdup_user((void __user *)arg, sizeof(*vol));
if (IS_ERR(vol))
return PTR_ERR(vol);
+ vol->name[BTRFS_PATH_NAME_MAX] = '\0';
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index efcf89a8ba44..1a4e2b101ef2 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -389,13 +389,11 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
/*
* Here we don't really care about alignment since extent allocator can
- * handle it. We care more about the size, as if one block group is
- * larger than maximum size, it's must be some obvious corruption.
+ * handle it. We care more about the size.
*/
- if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ if (key->offset == 0) {
block_group_err(fs_info, leaf, slot,
- "invalid block group size, have %llu expect (0, %llu]",
- key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ "invalid block group size 0");
return -EUCLEAN;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 95983c744164..1645fcfd9691 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -244,11 +244,13 @@ wait_for_old_object:
ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
- cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_retry);
+ cache->cache.ops->put_object(&xobject->fscache,
+ (enum fscache_obj_ref_trace)cachefiles_obj_put_wait_retry);
goto try_again;
requeue:
- cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
+ cache->cache.ops->put_object(&xobject->fscache,
+ (enum fscache_obj_ref_trace)cachefiles_obj_put_wait_timeo);
_leave(" = -ETIMEDOUT");
return -ETIMEDOUT;
}
@@ -336,7 +338,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
try_again:
/* first step is to make up a grave dentry in the graveyard */
sprintf(nbuffer, "%08x%08x",
- (uint32_t) get_seconds(),
+ (uint32_t) ktime_get_real_seconds(),
(uint32_t) atomic_inc_return(&cache->gravecounter));
/* do the multiway lock magic */
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 40f7595aad10..8a577409d030 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -535,7 +535,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
netpage->index, cachefiles_gfp);
if (ret < 0) {
if (ret == -EEXIST) {
+ put_page(backpage);
+ backpage = NULL;
put_page(netpage);
+ netpage = NULL;
fscache_retrieval_complete(op, 1);
continue;
}
@@ -608,7 +611,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
netpage->index, cachefiles_gfp);
if (ret < 0) {
if (ret == -EEXIST) {
+ put_page(backpage);
+ backpage = NULL;
put_page(netpage);
+ netpage = NULL;
fscache_retrieval_complete(op, 1);
continue;
}
@@ -962,11 +968,8 @@ void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
__releases(&object->fscache.cookie->lock)
{
struct cachefiles_object *object;
- struct cachefiles_cache *cache;
object = container_of(_object, struct cachefiles_object, fscache);
- cache = container_of(object->fscache.cache,
- struct cachefiles_cache, cache);
_enter("%p,{%lu}", object, page->index);
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 0a29a00aed2e..511e6c68156a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -135,7 +135,8 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
struct dentry *dentry = object->dentry;
int ret;
- ASSERT(dentry);
+ if (!dentry)
+ return -ESTALE;
_enter("%p,#%d", object, auxdata->len);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b5ecd6f50360..4e9a7cc488da 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -563,8 +563,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",noacl");
#endif
- if (fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM)
- seq_puts(m, ",nocopyfrom");
+ if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
+ seq_puts(m, ",copyfrom");
if (fsopt->mds_namespace)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c005a5400f2e..79a265ba9200 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -42,7 +42,9 @@
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
-#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
+#define CEPH_MOUNT_OPT_DEFAULT \
+ (CEPH_MOUNT_OPT_DCACHE | \
+ CEPH_MOUNT_OPT_NOCOPYFROM)
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index abcd78e332fe..85dadb93c992 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -133,7 +133,7 @@ config CIFS_XATTR
config CIFS_POSIX
bool "CIFS POSIX Extensions"
- depends on CIFS_XATTR
+ depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY && CIFS_XATTR
help
Enabling this option will cause the cifs client to attempt to
negotiate a newer dialect with servers, such as Samba 3.0.5
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3713d22b95a7..907e85d65bb4 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -174,7 +174,7 @@ cifs_bp_rename_retry:
cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath);
memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1);
- full_path[dfsplen] = '\\';
+ full_path[dfsplen] = dirsep;
for (i = 0; i < pplen-1; i++)
if (full_path[dfsplen+1+i] == '/')
full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 74c33d5fafc8..c9bc56b1baac 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2541,14 +2541,13 @@ static int
cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
struct cifs_aio_ctx *ctx)
{
- int wait_retry = 0;
unsigned int wsize, credits;
int rc;
struct TCP_Server_Info *server =
tlink_tcon(wdata->cfile->tlink)->ses->server;
/*
- * Try to resend this wdata, waiting for credits up to 3 seconds.
+ * Wait for credits to resend this wdata.
* Note: we are attempting to resend the whole wdata not in segments
*/
do {
@@ -2556,19 +2555,13 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
server, wdata->bytes, &wsize, &credits);
if (rc)
- break;
+ goto out;
if (wsize < wdata->bytes) {
add_credits_and_wake_if(server, credits, 0);
msleep(1000);
- wait_retry++;
}
- } while (wsize < wdata->bytes && wait_retry < 3);
-
- if (wsize < wdata->bytes) {
- rc = -EBUSY;
- goto out;
- }
+ } while (wsize < wdata->bytes);
rc = -EAGAIN;
while (rc == -EAGAIN) {
@@ -3234,14 +3227,13 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata,
struct list_head *rdata_list,
struct cifs_aio_ctx *ctx)
{
- int wait_retry = 0;
unsigned int rsize, credits;
int rc;
struct TCP_Server_Info *server =
tlink_tcon(rdata->cfile->tlink)->ses->server;
/*
- * Try to resend this rdata, waiting for credits up to 3 seconds.
+ * Wait for credits to resend this rdata.
* Note: we are attempting to resend the whole rdata not in segments
*/
do {
@@ -3249,24 +3241,13 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata,
&rsize, &credits);
if (rc)
- break;
+ goto out;
if (rsize < rdata->bytes) {
add_credits_and_wake_if(server, credits, 0);
msleep(1000);
- wait_retry++;
}
- } while (rsize < rdata->bytes && wait_retry < 3);
-
- /*
- * If we can't find enough credits to send this rdata
- * release the rdata and return failure, this will pass
- * whatever I/O amount we have finished to VFS.
- */
- if (rsize < rdata->bytes) {
- rc = -EBUSY;
- goto out;
- }
+ } while (rsize < rdata->bytes);
rc = -EAGAIN;
while (rc == -EAGAIN) {
diff --git a/fs/dax.c b/fs/dax.c
index 616e36ea6aaa..48132eca3761 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags)
return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
}
-static void *dax_make_page_entry(struct page *page)
-{
- pfn_t pfn = page_to_pfn_t(page);
- return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0);
-}
-
static bool dax_is_locked(void *entry)
{
return xa_to_value(entry) & DAX_LOCKED;
@@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry)
return 0;
}
-static int dax_is_pmd_entry(void *entry)
+static unsigned long dax_is_pmd_entry(void *entry)
{
return xa_to_value(entry) & DAX_PMD;
}
-static int dax_is_pte_entry(void *entry)
+static bool dax_is_pte_entry(void *entry)
{
return !(xa_to_value(entry) & DAX_PMD);
}
@@ -222,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas)
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
- entry = xas_load(xas);
- if (!entry || xa_is_internal(entry) ||
- WARN_ON_ONCE(!xa_is_value(entry)) ||
+ entry = xas_find_conflict(xas);
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
!dax_is_locked(entry))
return entry;
@@ -239,6 +232,34 @@ static void *get_unlocked_entry(struct xa_state *xas)
}
}
+/*
+ * The only thing keeping the address space around is the i_pages lock
+ * (it's cycled in clear_inode() after removing the entries from i_pages)
+ * After we call xas_unlock_irq(), we cannot touch xas->xa.
+ */
+static void wait_entry_unlocked(struct xa_state *xas, void *entry)
+{
+ struct wait_exceptional_entry_queue ewait;
+ wait_queue_head_t *wq;
+
+ init_wait(&ewait.wait);
+ ewait.wait.func = wake_exceptional_entry_func;
+
+ wq = dax_entry_waitqueue(xas, entry, &ewait.key);
+ prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
+ xas_unlock_irq(xas);
+ schedule();
+ finish_wait(wq, &ewait.wait);
+
+ /*
+ * Entry lock waits are exclusive. Wake up the next waiter since
+ * we aren't sure we will acquire the entry lock and thus wake
+ * the next waiter up on unlock.
+ */
+ if (waitqueue_active(wq))
+ __wake_up(wq, TASK_NORMAL, 1, &ewait.key);
+}
+
static void put_unlocked_entry(struct xa_state *xas, void *entry)
{
/* If we were the only waiter woken, wake the next one */
@@ -255,6 +276,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
{
void *old;
+ BUG_ON(dax_is_locked(entry));
xas_reset(xas);
xas_lock_irq(xas);
old = xas_store(xas, entry);
@@ -352,16 +374,27 @@ static struct page *dax_busy_page(void *entry)
return NULL;
}
-bool dax_lock_mapping_entry(struct page *page)
+/*
+ * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page
+ * @page: The page whose entry we want to lock
+ *
+ * Context: Process context.
+ * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
+ * not be locked.
+ */
+dax_entry_t dax_lock_page(struct page *page)
{
XA_STATE(xas, NULL, 0);
void *entry;
+ /* Ensure page->mapping isn't freed while we look at it */
+ rcu_read_lock();
for (;;) {
struct address_space *mapping = READ_ONCE(page->mapping);
- if (!dax_mapping(mapping))
- return false;
+ entry = NULL;
+ if (!mapping || !dax_mapping(mapping))
+ break;
/*
* In the device-dax case there's no need to lock, a
@@ -370,8 +403,9 @@ bool dax_lock_mapping_entry(struct page *page)
* otherwise we would not have a valid pfn_to_page()
* translation.
*/
+ entry = (void *)~0UL;
if (S_ISCHR(mapping->host->i_mode))
- return true;
+ break;
xas.xa = &mapping->i_pages;
xas_lock_irq(&xas);
@@ -382,20 +416,20 @@ bool dax_lock_mapping_entry(struct page *page)
xas_set(&xas, page->index);
entry = xas_load(&xas);
if (dax_is_locked(entry)) {
- entry = get_unlocked_entry(&xas);
- /* Did the page move while we slept? */
- if (dax_to_pfn(entry) != page_to_pfn(page)) {
- xas_unlock_irq(&xas);
- continue;
- }
+ rcu_read_unlock();
+ wait_entry_unlocked(&xas, entry);
+ rcu_read_lock();
+ continue;
}
dax_lock_entry(&xas, entry);
xas_unlock_irq(&xas);
- return true;
+ break;
}
+ rcu_read_unlock();
+ return (dax_entry_t)entry;
}
-void dax_unlock_mapping_entry(struct page *page)
+void dax_unlock_page(struct page *page, dax_entry_t cookie)
{
struct address_space *mapping = page->mapping;
XA_STATE(xas, &mapping->i_pages, page->index);
@@ -403,7 +437,7 @@ void dax_unlock_mapping_entry(struct page *page)
if (S_ISCHR(mapping->host->i_mode))
return;
- dax_unlock_entry(&xas, dax_make_page_entry(page));
+ dax_unlock_entry(&xas, (void *)cookie);
}
/*
@@ -445,11 +479,9 @@ static void *grab_mapping_entry(struct xa_state *xas,
retry:
xas_lock_irq(xas);
entry = get_unlocked_entry(xas);
- if (xa_is_internal(entry))
- goto fallback;
if (entry) {
- if (WARN_ON_ONCE(!xa_is_value(entry))) {
+ if (!xa_is_value(entry)) {
xas_set_err(xas, EIO);
goto out_unlock;
}
@@ -1628,8 +1660,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
/* Did we race with someone splitting entry or so? */
if (!entry ||
(order == 0 && !dax_is_pte_entry(entry)) ||
- (order == PMD_ORDER && (xa_is_internal(entry) ||
- !dax_is_pmd_entry(entry)))) {
+ (order == PMD_ORDER && !dax_is_pmd_entry(entry))) {
put_unlocked_entry(&xas, entry);
xas_unlock_irq(&xas);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 722d17c88edb..41a0e97252ae 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
*/
dio->iocb->ki_pos += transferred;
- if (dio->op == REQ_OP_WRITE)
- ret = generic_write_sync(dio->iocb, transferred);
+ if (ret > 0 && dio->op == REQ_OP_WRITE)
+ ret = generic_write_sync(dio->iocb, ret);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 645158dc33f1..c69927bed4ef 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -77,7 +77,7 @@ static bool dentry_connected(struct dentry *dentry)
struct dentry *parent = dget_parent(dentry);
dput(dentry);
- if (IS_ROOT(dentry)) {
+ if (dentry == parent) {
dput(parent);
return false;
}
@@ -147,6 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf));
if (IS_ERR(tmp)) {
dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+ err = PTR_ERR(tmp);
goto out_err;
}
if (tmp != dentry) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index cb91baa4275d..eb11502e3fcd 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -892,6 +892,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;
+ opts.s_mount_opt = 0;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
if (def_mount_opts & EXT2_DEFM_DEBUG)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 62d9a659a8ff..dd8f10db82e9 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -612,9 +612,9 @@ skip_replace:
}
cleanup:
- brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
+ brelse(bh);
up_write(&EXT2_I(inode)->xattr_sem);
return error;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9edc920f651f..6d9cb1719de5 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -730,6 +730,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
if (awaken)
wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+ if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags))
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+
/* Prevent a race with our last child, which has to signal EV_CLEARED
* before dropping our spinlock.
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 47395b0c3b35..e909678afa2d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1119,8 +1119,10 @@ static int fuse_permission(struct inode *inode, int mask)
if (fc->default_permissions ||
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
+ u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
- if (time_before64(fi->i_time, get_jiffies_64())) {
+ if (perm_mask & READ_ONCE(fi->inval_mask) ||
+ time_before64(fi->i_time, get_jiffies_64())) {
refreshed = true;
err = fuse_perm_getattr(inode, mask);
@@ -1241,7 +1243,7 @@ static int fuse_dir_open(struct inode *inode, struct file *file)
static int fuse_dir_release(struct inode *inode, struct file *file)
{
- fuse_release_common(file, FUSE_RELEASEDIR);
+ fuse_release_common(file, true);
return 0;
}
@@ -1249,7 +1251,25 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
- return fuse_fsync_common(file, start, end, datasync, 1);
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (fc->no_fsyncdir)
+ return 0;
+
+ inode_lock(inode);
+ err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
+ if (err == -ENOSYS) {
+ fc->no_fsyncdir = 1;
+ err = 0;
+ }
+ inode_unlock(inode);
+
+ return err;
}
static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b52f9baaa3e7..ffaffe18352a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -89,12 +89,12 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
iput(req->misc.release.inode);
}
-static void fuse_file_put(struct fuse_file *ff, bool sync)
+static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
{
if (refcount_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- if (ff->fc->no_open) {
+ if (ff->fc->no_open && !isdir) {
/*
* Drop the release request when client does not
* implement 'open'
@@ -247,10 +247,11 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
req->in.args[0].value = inarg;
}
-void fuse_release_common(struct file *file, int opcode)
+void fuse_release_common(struct file *file, bool isdir)
{
struct fuse_file *ff = file->private_data;
struct fuse_req *req = ff->reserved_req;
+ int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
fuse_prepare_release(ff, file->f_flags, opcode);
@@ -272,7 +273,7 @@ void fuse_release_common(struct file *file, int opcode)
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
*/
- fuse_file_put(ff, ff->fc->destroy_req != NULL);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -288,7 +289,7 @@ static int fuse_release(struct inode *inode, struct file *file)
if (fc->writeback_cache)
write_inode_now(inode, 1);
- fuse_release_common(file, FUSE_RELEASE);
+ fuse_release_common(file, false);
/* return value is ignored by VFS */
return 0;
@@ -302,7 +303,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags)
* iput(NULL) is a no-op and since the refcount is 1 and everything's
* synchronous, we are fine with not doing igrab() here"
*/
- fuse_file_put(ff, true);
+ fuse_file_put(ff, true, false);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -441,13 +442,30 @@ static int fuse_flush(struct file *file, fl_owner_t id)
}
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
- int datasync, int isdir)
+ int datasync, int opcode)
{
struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
FUSE_ARGS(args);
struct fuse_fsync_in inarg;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.fsync_flags = datasync ? 1 : 0;
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ return fuse_simple_request(fc, &args);
+}
+
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
int err;
if (is_bad_inode(inode))
@@ -479,34 +497,18 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if (err)
goto out;
- if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ if (fc->no_fsync)
goto out;
- memset(&inarg, 0, sizeof(inarg));
- inarg.fh = ff->fh;
- inarg.fsync_flags = datasync ? 1 : 0;
- args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 1;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- err = fuse_simple_request(fc, &args);
+ err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
if (err == -ENOSYS) {
- if (isdir)
- fc->no_fsyncdir = 1;
- else
- fc->no_fsync = 1;
+ fc->no_fsync = 1;
err = 0;
}
out:
inode_unlock(inode);
- return err;
-}
-static int fuse_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
-{
- return fuse_fsync_common(file, start, end, datasync, 0);
+ return err;
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -807,7 +809,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
put_page(page);
}
if (req->ff)
- fuse_file_put(req->ff, false);
+ fuse_file_put(req->ff, false, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1460,7 +1462,7 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
__free_page(req->pages[i]);
if (req->ff)
- fuse_file_put(req->ff, false);
+ fuse_file_put(req->ff, false, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1619,7 +1621,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
ff = __fuse_write_file_get(fc, fi);
err = fuse_flush_times(inode, ff);
if (ff)
- fuse_file_put(ff, 0);
+ fuse_file_put(ff, false, false);
return err;
}
@@ -1940,7 +1942,7 @@ static int fuse_writepages(struct address_space *mapping,
err = 0;
}
if (data.ff)
- fuse_file_put(data.ff, false);
+ fuse_file_put(data.ff, false, false);
kfree(data.orig_pages);
out:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e9f712e81c7d..2f2c92e6f8cb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -822,13 +822,13 @@ void fuse_sync_release(struct fuse_file *ff, int flags);
/**
* Send RELEASE or RELEASEDIR request
*/
-void fuse_release_common(struct file *file, int opcode);
+void fuse_release_common(struct file *file, bool isdir);
/**
* Send FSYNC or FSYNCDIR request
*/
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
- int datasync, int isdir);
+ int datasync, int opcode);
/**
* Notify poll wakeup
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 0b94b23b02d4..568abed20eb2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -115,7 +115,7 @@ static void fuse_i_callback(struct rcu_head *head)
static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- if (S_ISREG(inode->i_mode)) {
+ if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
@@ -1068,6 +1068,7 @@ void fuse_dev_free(struct fuse_dev *fud)
fuse_conn_put(fc);
}
+ kfree(fud->pq.processing);
kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 98b96ffb95ed..19017d296173 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -338,13 +338,14 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 236efe51eca6..66774f4cb4fd 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -466,14 +466,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
nidx -= len * 8;
i = node->next;
- hfs_bnode_put(node);
if (!i) {
/* panic */;
pr_crit("unable to free bnode %u. "
"bmap not found!\n",
node->this);
+ hfs_bnode_put(node);
return;
}
+ hfs_bnode_put(node);
node = hfs_bnode_find(tree, i);
if (IS_ERR(node))
return;
diff --git a/fs/iomap.c b/fs/iomap.c
index 64ce240217a1..5bc172f3dfe8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page)
atomic_set(&iop->read_count, 0);
atomic_set(&iop->write_count, 0);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+
+ /*
+ * migrate_page_move_mapping() assumes that pages with private data have
+ * their count elevated by 1.
+ */
+ get_page(page);
set_page_private(page, (unsigned long)iop);
SetPagePrivate(page);
return iop;
@@ -132,6 +138,7 @@ iomap_page_release(struct page *page)
WARN_ON_ONCE(atomic_read(&iop->write_count));
ClearPagePrivate(page);
set_page_private(page, 0);
+ put_page(page);
kfree(iop);
}
@@ -142,13 +149,14 @@ static void
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
{
+ loff_t orig_pos = *pos;
+ loff_t isize = i_size_read(inode);
unsigned block_bits = inode->i_blkbits;
unsigned block_size = (1 << block_bits);
unsigned poff = offset_in_page(*pos);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
unsigned first = poff >> block_bits;
unsigned last = (poff + plen - 1) >> block_bits;
- unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;
/*
* If the block size is smaller than the page size we need to check the
@@ -183,8 +191,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
* handle both halves separately so that we properly zero data in the
* page cache for blocks that are entirely outside of i_size.
*/
- if (first <= end && last > end)
- plen -= (last - end) * block_size;
+ if (orig_pos <= isize && orig_pos + length > isize) {
+ unsigned end = offset_in_page(isize - 1) >> block_bits;
+
+ if (first <= end && last > end)
+ plen -= (last - end) * block_size;
+ }
*offp = poff;
*lenp = plen;
@@ -1580,7 +1592,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
struct bio *bio;
bool need_zeroout = false;
bool use_fua = false;
- int nr_pages, ret;
+ int nr_pages, ret = 0;
size_t copied = 0;
if ((pos | length | align) & ((1 << blkbits) - 1))
@@ -1596,12 +1608,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
if (iomap->flags & IOMAP_F_NEW) {
need_zeroout = true;
- } else {
+ } else if (iomap->type == IOMAP_MAPPED) {
/*
- * Use a FUA write if we need datasync semantics, this
- * is a pure data IO that doesn't require any metadata
- * updates and the underlying device supports FUA. This
- * allows us to avoid cache flushes on IO completion.
+ * Use a FUA write if we need datasync semantics, this is a pure
+ * data IO that doesn't require any metadata updates (including
+ * after IO completion such as unwritten extent conversion) and
+ * the underlying device supports FUA. This allows us to avoid
+ * cache flushes on IO completion.
*/
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
(dio->flags & IOMAP_DIO_WRITE_FUA) &&
@@ -1644,8 +1657,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
ret = bio_iov_iter_get_pages(bio, &iter);
if (unlikely(ret)) {
+ /*
+ * We have to stop part way through an IO. We must fall
+ * through to the sub-block tail zeroing here, otherwise
+ * this short IO may expose stale data in the tail of
+ * the block we haven't written data to.
+ */
bio_put(bio);
- return copied ? copied : ret;
+ goto zero_tail;
}
n = bio->bi_iter.bi_size;
@@ -1676,13 +1695,21 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
dio->submit.cookie = submit_bio(bio);
} while (nr_pages);
- if (need_zeroout) {
+ /*
+ * We need to zeroout the tail of a sub-block write if the extent type
+ * requires zeroing or the write extends beyond EOF. If we don't zero
+ * the block tail in the latter case, we can expose stale data via mmap
+ * reads of the EOF block.
+ */
+zero_tail:
+ if (need_zeroout ||
+ ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
}
- return copied;
+ return copied ? copied : ret;
}
static loff_t
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7b861bbc0b43..315967354954 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
{
struct cb_offloadargs *args = data;
struct nfs_server *server;
- struct nfs4_copy_state *copy;
+ struct nfs4_copy_state *copy, *tmp_copy;
bool found = false;
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ if (!copy)
+ return htonl(NFS4ERR_SERVERFAULT);
+
spin_lock(&cps->clp->cl_lock);
rcu_read_lock();
list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
client_link) {
- list_for_each_entry(copy, &server->ss_copies, copies) {
+ list_for_each_entry(tmp_copy, &server->ss_copies, copies) {
if (memcmp(args->coa_stateid.other,
- copy->stateid.other,
+ tmp_copy->stateid.other,
sizeof(args->coa_stateid.other)))
continue;
- nfs4_copy_cb_args(copy, args);
- complete(&copy->completion);
+ nfs4_copy_cb_args(tmp_copy, args);
+ complete(&tmp_copy->completion);
found = true;
goto out;
}
@@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
out:
rcu_read_unlock();
if (!found) {
- copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
- if (!copy) {
- spin_unlock(&cps->clp->cl_lock);
- return htonl(NFS4ERR_SERVERFAULT);
- }
memcpy(&copy->stateid, &args->coa_stateid, NFS4_STATEID_SIZE);
nfs4_copy_cb_args(copy, args);
list_add_tail(&copy->copies, &cps->clp->pending_cb_stateids);
- }
+ } else
+ kfree(copy);
spin_unlock(&cps->clp->cl_lock);
return 0;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aa12c3063bae..33824a0a57bf 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -98,8 +98,11 @@ struct nfs_direct_req {
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work;
int flags;
+ /* for write */
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ /* for read */
+#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
struct nfs_writeverf verf; /* unstable write verifier */
};
@@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
- if (!PageCompound(page) && bytes < hdr->good_bytes)
+ if (!PageCompound(page) && bytes < hdr->good_bytes &&
+ (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
set_page_dirty(page);
bytes += req->wb_bytes;
nfs_list_remove_request(req);
@@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
+ if (iter_is_iovec(iter))
+ dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
+
nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 86bcba40ca61..310d7500f665 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
task))
return;
- if (ff_layout_read_prepare_common(task, hdr))
- return;
-
- if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
- hdr->args.lock_context, FMODE_READ) == -EIO)
- rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+ ff_layout_read_prepare_common(task, hdr);
}
static void ff_layout_read_call_done(struct rpc_task *task, void *data)
@@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
task))
return;
- if (ff_layout_write_prepare_common(task, hdr))
- return;
-
- if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
- hdr->args.lock_context, FMODE_WRITE) == -EIO)
- rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+ ff_layout_write_prepare_common(task, hdr);
}
static void ff_layout_write_call_done(struct rpc_task *task, void *data)
@@ -1742,6 +1732,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
hdr->args.fh = fh;
+
+ if (vers == 4 &&
+ !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+ goto out_failed;
+
/*
* Note that if we ever decide to split across DSes,
* then we may need to handle dense-like offsets.
@@ -1804,6 +1799,10 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
if (fh)
hdr->args.fh = fh;
+ if (vers == 4 &&
+ !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+ goto out_failed;
+
/*
* Note that if we ever decide to split across DSes,
* then we may need to handle dense-like offsets.
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 411798346e48..de50a342d5a5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
unsigned int maxnum);
struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+ u32 mirror_idx,
+ nfs4_stateid *stateid);
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 74d8d5352438..d23347389626 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -370,6 +370,25 @@ out:
return fh;
}
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+ u32 mirror_idx,
+ nfs4_stateid *stateid)
+{
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
+
+ if (!ff_layout_mirror_valid(lseg, mirror, false)) {
+ pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
+ __func__, mirror_idx);
+ goto out;
+ }
+
+ nfs4_stateid_copy(stateid, &mirror->stateid);
+ return 1;
+out:
+ return 0;
+}
+
/**
* nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
* @lseg: the layout segment we're operating on
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ac5b784a1de0..fed06fd9998d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res,
struct file *dst,
nfs4_stateid *src_stateid)
{
- struct nfs4_copy_state *copy;
+ struct nfs4_copy_state *copy, *tmp_copy;
int status = NFS4_OK;
bool found_pending = false;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ if (!copy)
+ return -ENOMEM;
+
spin_lock(&server->nfs_client->cl_lock);
- list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
+ list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids,
copies) {
- if (memcmp(&res->write_res.stateid, &copy->stateid,
+ if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
NFS4_STATEID_SIZE))
continue;
found_pending = true;
- list_del(&copy->copies);
+ list_del(&tmp_copy->copies);
break;
}
if (found_pending) {
spin_unlock(&server->nfs_client->cl_lock);
+ kfree(copy);
+ copy = tmp_copy;
goto out;
}
- copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
- if (!copy) {
- spin_unlock(&server->nfs_client->cl_lock);
- return -ENOMEM;
- }
memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
init_completion(&copy->completion);
copy->parent_state = ctx->state;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d59c9655ec4..1b994b527518 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -41,6 +41,8 @@ enum nfs4_client_state {
NFS4CLNT_MOVED,
NFS4CLNT_LEASE_MOVED,
NFS4CLNT_DELEGATION_EXPIRED,
+ NFS4CLNT_RUN_MANAGER,
+ NFS4CLNT_DELEGRETURN_RUNNING,
};
#define NFS4_RENEW_TIMEOUT 0x01
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ffea57885394..d8decf2ec48f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
__module_get(THIS_MODULE);
@@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Ensure exclusive access to NFSv4 state */
do {
+ clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
section = "purge state";
status = nfs4_purge_lease(clp);
@@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
nfs4_end_drain_session(clp);
- if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
- nfs_client_return_marked_delegations(clp);
- continue;
+ nfs4_clear_state_manager_bit(clp);
+
+ if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
+ if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
+ nfs_client_return_marked_delegations(clp);
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+ }
+ clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
}
- nfs4_clear_state_manager_bit(clp);
/* Did we race with an attempt to give us more work? */
- if (clp->cl_state == 0)
+ if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
return;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index de99db518571..f2129a5d9f23 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -266,9 +266,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
return;
if (nbh == NULL) { /* blocksize == pagesize */
- xa_lock_irq(&btnc->i_pages);
- __xa_erase(&btnc->i_pages, newkey);
- xa_unlock_irq(&btnc->i_pages);
+ xa_erase_irq(&btnc->i_pages, newkey);
unlock_page(ctxt->bh->b_page);
} else
brelse(nbh);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 9f88188060db..4bf8d5854b27 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -125,10 +125,10 @@ check_err:
check_gen:
if (handle->ih_generation != inode->i_generation) {
- iput(inode);
trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
handle->ih_generation,
inode->i_generation);
+ iput(inode);
result = ERR_PTR(-ESTALE);
goto bail;
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 3f1685d7d43b..1565dd8e8856 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -157,18 +157,14 @@ out:
}
/*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
*/
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_move,
u32 extents_to_split,
struct ocfs2_alloc_context **meta_ac,
- struct ocfs2_alloc_context **data_ac,
int extra_blocks,
int *credits)
{
@@ -193,13 +189,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
goto out;
}
- if (data_ac) {
- ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
@@ -259,10 +248,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
- &context->meta_ac,
- &context->data_ac,
- extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ *len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -285,6 +274,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
+ /*
+ * Make sure ocfs2_reserve_cluster is called after
+ * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+ *
+ * If ocfs2_reserve_cluster is called
+ * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+ * may happen.
+ *
+ */
+ ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_mutex;
+ }
+
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -617,9 +621,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
- &context->meta_ac,
- NULL, extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index c6289147c787..82c129bfe58d 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -651,6 +651,18 @@ static int ovl_symlink(struct inode *dir, struct dentry *dentry,
return ovl_create_object(dentry, S_IFLNK, 0, link);
}
+static int ovl_set_link_redirect(struct dentry *dentry)
+{
+ const struct cred *old_cred;
+ int err;
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ err = ovl_set_redirect(dentry, false);
+ revert_creds(old_cred);
+
+ return err;
+}
+
static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
@@ -670,7 +682,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
goto out_drop_write;
if (ovl_is_metacopy_dentry(old)) {
- err = ovl_set_redirect(old, false);
+ err = ovl_set_link_redirect(old);
if (err)
goto out_drop_write;
}
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 8fa37cd7818a..54e5d17d7f3e 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -754,9 +754,8 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
goto out;
}
- /* Otherwise, get a connected non-upper dir or disconnected non-dir */
- if (d_is_dir(origin.dentry) &&
- (origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+ /* Find origin.dentry again with ovl_acceptable() layer check */
+ if (d_is_dir(origin.dentry)) {
dput(origin.dentry);
origin.dentry = NULL;
err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
@@ -769,6 +768,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
goto out_err;
}
+ /* Get a connected non-upper dir or disconnected non-dir */
dentry = ovl_get_dentry(sb, NULL, &origin, index);
out:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 6bcc9dedc342..3b7ed5d2279c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -286,22 +286,13 @@ int ovl_permission(struct inode *inode, int mask)
if (err)
return err;
- /* No need to do any access on underlying for special files */
- if (special_file(realinode->i_mode))
- return 0;
-
- /* No need to access underlying for execute */
- mask &= ~MAY_EXEC;
- if ((mask & (MAY_READ | MAY_WRITE)) == 0)
- return 0;
-
- /* Lower files get copied up, so turn write access into read */
- if (!upperinode && mask & MAY_WRITE) {
+ old_cred = ovl_override_creds(inode->i_sb);
+ if (!upperinode &&
+ !special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
+ /* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
-
- old_cred = ovl_override_creds(inode->i_sb);
err = inode_permission(realinode, mask);
revert_creds(old_cred);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ffcff6516e89..e02a9039b5ea 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -816,17 +816,14 @@ static int ramoops_probe(struct platform_device *pdev)
cxt->pstore.data = cxt;
/*
- * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we
- * have to handle dumps, we must have at least record_size buffer. And
- * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be
- * ZERO_SIZE_PTR).
+ * Since bufsize is only used for dmesg crash dumps, it
+ * must match the size of the dprz record (after PRZ header
+ * and ECC bytes have been accounted for).
*/
- if (cxt->console_size)
- cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */
- cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize);
- cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
+ cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
+ cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
if (!cxt->pstore.buf) {
- pr_err("cannot allocate pstore buffer\n");
+ pr_err("cannot allocate pstore crash dump buffer\n");
err = -ENOMEM;
goto fail_clear;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index bfcb4ced5664..58f30537c47a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1956,7 +1956,7 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct inode *inode_out = file_inode(file_out);
loff_t ret;
- WARN_ON_ONCE(remap_flags);
+ WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
@@ -2094,17 +2094,18 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
off = same->src_offset;
len = same->src_length;
- ret = -EISDIR;
if (S_ISDIR(src->i_mode))
- goto out;
+ return -EISDIR;
- ret = -EINVAL;
if (!S_ISREG(src->i_mode))
- goto out;
+ return -EINVAL;
+
+ if (!file->f_op->remap_file_range)
+ return -EOPNOTSUPP;
ret = remap_verify_area(file, off, len, false);
if (ret < 0)
- goto out;
+ return ret;
ret = 0;
if (off + len > i_size_read(src))
@@ -2147,10 +2148,8 @@ next_fdput:
fdput(dst_fd);
next_loop:
if (fatal_signal_pending(current))
- goto out;
+ break;
}
-
-out:
return ret;
}
EXPORT_SYMBOL(vfs_dedupe_file_range);
diff --git a/fs/splice.c b/fs/splice.c
index 3553f1956508..de2ede048473 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -945,11 +945,16 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
sd->flags &= ~SPLICE_F_NONBLOCK;
more = sd->flags & SPLICE_F_MORE;
+ WARN_ON_ONCE(pipe->nrbufs != 0);
+
while (len) {
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
- ret = do_splice_to(in, &pos, pipe, len, flags);
+ /* Don't try to read more the pipe has space for. */
+ read_len = min_t(size_t, len,
+ (pipe->buffers - pipe->nrbufs) << PAGE_SHIFT);
+ ret = do_splice_to(in, &pos, pipe, read_len, flags);
if (unlikely(ret <= 0))
goto out_release;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 499a20a5a010..273736f41be3 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
}
}
brelse(bh);
- return 0;
+ return err;
}
int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8f2f56d9a1bb..e3d684ea3203 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -827,16 +827,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
- if (ret < 0)
- goto out_bh;
-
- strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ if (ret < 0) {
+ strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName");
+ pr_warn("incorrect volume identification, setting to "
+ "'InvalidName'\n");
+ } else {
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ }
udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128);
- if (ret < 0)
+ if (ret < 0) {
+ ret = 0;
goto out_bh;
-
+ }
outstr[ret] = 0;
udf_debug("volSetIdent[] = '%s'\n", outstr);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 45234791fec2..5fcfa96463eb 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -351,6 +351,11 @@ try_again:
return u_len;
}
+/*
+ * Convert CS0 dstring to output charset. Warning: This function may truncate
+ * input string if it is too long as it is used for informational strings only
+ * and it is better to truncate the string than to refuse mounting a media.
+ */
int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
const uint8_t *ocu_i, int i_len)
{
@@ -359,9 +364,12 @@ int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
if (i_len > 0) {
s_len = ocu_i[i_len - 1];
if (s_len >= i_len) {
- pr_err("incorrect dstring lengths (%d/%d)\n",
- s_len, i_len);
- return -EINVAL;
+ pr_warn("incorrect dstring lengths (%d/%d),"
+ " truncating\n", s_len, i_len);
+ s_len = i_len - 1;
+ /* 2-byte encoding? Need to round properly... */
+ if (ocu_i[0] == 16)
+ s_len -= (s_len - 1) & 2;
}
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 356d2b8568c1..7a85e609fc27 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
ret = -EINVAL;
if (!vma_can_userfault(cur))
goto out_unlock;
+
+ /*
+ * UFFDIO_COPY will fill file holes even without
+ * PROT_WRITE. This check enforces that if this is a
+ * MAP_SHARED, the process has write permission to the backing
+ * file. If VM_MAYWRITE is set it also enforces that on a
+ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+ * F_WRITE_SEAL can be taken until the vma is destroyed.
+ */
+ ret = -EPERM;
+ if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+
/*
* If this vma contains ending address, and huge pages
* check alignment.
@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
BUG_ON(!vma_can_userfault(vma));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
@@ -1560,6 +1574,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
if (!vma->vm_userfaultfd_ctx.ctx)
goto skip;
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
+
if (vma->vm_start > start)
start = vma->vm_start;
vma_end = min(end, vma->vm_end);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 74d7228e755b..19e921d1586f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1694,10 +1694,13 @@ xfs_bmap_add_extent_delay_real(
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Filling in all of a previously delayed allocation extent.
- * The right neighbor is contiguous, the left is not.
+ * The right neighbor is contiguous, the left is not. Take care
+ * with delay -> unwritten extent allocation here because the
+ * delalloc record we are overwriting is always written.
*/
PREV.br_startblock = new->br_startblock;
PREV.br_blockcount += RIGHT.br_blockcount;
+ PREV.br_state = new->br_state;
xfs_iext_next(ifp, &bma->icur);
xfs_iext_remove(bma->ip, &bma->icur, state);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 34c6d7bd4d18..bbdae2b4559f 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -330,7 +330,7 @@ xfs_btree_sblock_verify_crc(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
- return __this_address;
+ return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 86c50208a143..7fbf8af0b159 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -538,15 +538,18 @@ xfs_inobt_rec_check_count(
static xfs_extlen_t
xfs_inobt_max_size(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
{
+ xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno);
+
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (mp->m_inobt_mxr[0] == 0)
return 0;
return xfs_btree_calc_size(mp->m_inobt_mnr,
- (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
- XFS_INODES_PER_CHUNK);
+ (uint64_t)agblocks * mp->m_sb.sb_inopblock /
+ XFS_INODES_PER_CHUNK);
}
static int
@@ -594,7 +597,7 @@ xfs_finobt_calc_reserves(
if (error)
return error;
- *ask += xfs_inobt_max_size(mp);
+ *ask += xfs_inobt_max_size(mp, agno);
*used += tree_len;
return 0;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5d263dfdb3bc..1ee8c5539fa4 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1042,7 +1042,7 @@ out_trans_cancel:
goto out_unlock;
}
-static int
+int
xfs_flush_unmap_range(
struct xfs_inode *ip,
xfs_off_t offset,
@@ -1126,9 +1126,9 @@ xfs_free_file_space(
* page could be mmap'd and iomap_zero_range doesn't do that for us.
* Writeback of the eof page will do this, albeit clumsily.
*/
- if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
+ if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
- (offset + len) & ~PAGE_MASK, LLONG_MAX);
+ round_down(offset + len, PAGE_SIZE), LLONG_MAX);
}
return error;
@@ -1195,13 +1195,7 @@ xfs_prepare_shift(
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF.
*/
- error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1);
- if (error)
- return error;
- error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
- offset >> PAGE_SHIFT, -1);
- if (error)
- return error;
+ error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
/*
* Clean out anything hanging around in the cow fork now that
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 87363d136bb6..7a78229cf1a7 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -80,4 +80,7 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, xfs_extnum_t *nextents,
xfs_filblks_t *count);
+int xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t len);
+
#endif /* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 12d8455bfbb2..010db5f8fb00 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1233,9 +1233,23 @@ xfs_buf_iodone(
}
/*
- * Requeue a failed buffer for writeback
+ * Requeue a failed buffer for writeback.
*
- * Return true if the buffer has been re-queued properly, false otherwise
+ * We clear the log item failed state here as well, but we have to be careful
+ * about reference counts because the only active reference counts on the buffer
+ * may be the failed log items. Hence if we clear the log item failed state
+ * before queuing the buffer for IO we can release all active references to
+ * the buffer and free it, leading to use after free problems in
+ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
+ * order we process them in - the buffer is locked, and we own the buffer list
+ * so nothing on them is going to change while we are performing this action.
+ *
+ * Hence we can safely queue the buffer for IO before we clear the failed log
+ * item state, therefore always having an active reference to the buffer and
+ * avoiding the transient zero-reference state that leads to use-after-free.
+ *
+ * Return true if the buffer was added to the buffer list, false if it was
+ * already on the buffer list.
*/
bool
xfs_buf_resubmit_failed_buffers(
@@ -1243,16 +1257,16 @@ xfs_buf_resubmit_failed_buffers(
struct list_head *buffer_list)
{
struct xfs_log_item *lip;
+ bool ret;
+
+ ret = xfs_buf_delwri_queue(bp, buffer_list);
/*
- * Clear XFS_LI_FAILED flag from all items before resubmit
- *
- * XFS_LI_FAILED set/clear is protected by ail_lock, caller this
+ * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this
* function already have it acquired
*/
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
xfs_clear_li_failed(lip);
- /* Add this buffer back to the delayed write list */
- return xfs_buf_delwri_queue(bp, buffer_list);
+ return ret;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 53c9ab8fb777..e47425071e65 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -920,7 +920,7 @@ out_unlock:
}
-loff_t
+STATIC loff_t
xfs_file_remap_range(
struct file *file_in,
loff_t pos_in,
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 73a1d77ec187..3091e4bc04ef 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -40,7 +40,7 @@ xfs_fill_statvfs_from_dquot(
statp->f_files = limit;
statp->f_ffree =
(statp->f_files > dqp->q_res_icount) ?
- (statp->f_ffree - dqp->q_res_icount) : 0;
+ (statp->f_files - dqp->q_res_icount) : 0;
}
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ecdb086bc23e..322a852ce284 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -296,6 +296,7 @@ xfs_reflink_reserve_cow(
if (error)
return error;
+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
trace_xfs_reflink_cow_alloc(ip, &got);
return 0;
}
@@ -1351,10 +1352,19 @@ xfs_reflink_remap_prep(
if (ret)
goto out_unlock;
- /* Zap any page cache for the destination file's range. */
- truncate_inode_pages_range(&inode_out->i_data,
- round_down(pos_out, PAGE_SIZE),
- round_up(pos_out + *len, PAGE_SIZE) - 1);
+ /*
+ * If pos_out > EOF, we may have dirtied blocks between EOF and
+ * pos_out. In that case, we need to extend the flush and unmap to cover
+ * from EOF to the end of the copy length.
+ */
+ if (pos_out > XFS_ISIZE(dest)) {
+ loff_t flen = *len + (pos_out - XFS_ISIZE(dest));
+ ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen);
+ } else {
+ ret = xfs_flush_unmap_range(dest, pos_out, *len);
+ }
+ if (ret)
+ goto out_unlock;
return 1;
out_unlock:
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 3043e5ed6495..8a6532aae779 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -280,7 +280,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
),
TP_fast_assign(
__entry->dev = bp->b_target->bt_dev;
- __entry->bno = bp->b_bn;
+ if (bp->b_bn == XFS_BUF_DADDR_NULL)
+ __entry->bno = bp->b_maps[0].bm_bn;
+ else
+ __entry->bno = bp->b_bn;
__entry->nblks = bp->b_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);