22 files changed, 211 insertions, 107 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a8b8adc05070..5a3327b8f90d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	unsigned nr_extents = 0;
 	int extra_reserve = 0;
 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
-	int ret;
+	int ret = 0;
 	bool delalloc_lock = true;
 
 	/* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	csum_bytes = BTRFS_I(inode)->csum_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 
-	if (root->fs_info->quota_enabled) {
+	if (root->fs_info->quota_enabled)
 		ret = btrfs_qgroup_reserve(root, num_bytes +
 					   nr_extents * root->leafsize);
-		if (ret) {
-			spin_lock(&BTRFS_I(inode)->lock);
-			calc_csum_metadata_size(inode, num_bytes, 0);
-			spin_unlock(&BTRFS_I(inode)->lock);
-			if (delalloc_lock)
-				mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
-			return ret;
-		}
-	}
 
-	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
+	/*
+	 * ret != 0 here means the qgroup reservation failed, we go straight to
+	 * the shared error handling then.
+	 */
+	if (ret == 0)
+		ret = reserve_metadata_bytes(root, block_rsv,
+					     to_reserve, flush);
+
 	if (ret) {
 		u64 to_free = 0;
 		unsigned dropped;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2e8cae63d247..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -288,7 +288,8 @@ out:
 void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
 {
 	clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
-	try_merge_map(tree, em);
+	if (em->in_tree)
+		try_merge_map(tree, em);
 }
 
 /**
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f76b1fd160d4..aeb84469d2c4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	struct btrfs_key key;
 	struct btrfs_ioctl_defrag_range_args range;
 	int num_defrag;
+	int index;
+	int ret;
 
 	/* get the inode */
 	key.objectid = defrag->root;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 	key.offset = (u64)-1;
+
+	index = srcu_read_lock(&fs_info->subvol_srcu);
+
 	inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
 	if (IS_ERR(inode_root)) {
-		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
-		return PTR_ERR(inode_root);
+		ret = PTR_ERR(inode_root);
+		goto cleanup;
+	}
+	if (btrfs_root_refs(&inode_root->root_item) == 0) {
+		ret = -ENOENT;
+		goto cleanup;
 	}
 
 	key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	key.offset = 0;
 	inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
 	if (IS_ERR(inode)) {
-		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
-		return PTR_ERR(inode);
+		ret = PTR_ERR(inode);
+		goto cleanup;
 	}
+	srcu_read_unlock(&fs_info->subvol_srcu, index);
 
 	/* do a chunk of defrag */
 	clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 
 	iput(inode);
 	return 0;
+cleanup:
+	srcu_read_unlock(&fs_info->subvol_srcu, index);
+	kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+	return ret;
 }
 
 /*
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		if (err < 0 && num_written > 0)
 			num_written = err;
 	}
-out:
+
 	if (sync)
 		atomic_dec(&BTRFS_I(inode)->sync_writers);
+out:
 	sb_end_write(inode->i_sb);
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5b22d45d3c6a..338f2597bf7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	BUG_ON(ret);
 
-	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
 	if (async_transid) {
 		*async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
 	}
 	if (err && !ret)
 		ret = err;
+
+	if (!ret)
+		d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
+
 	return ret;
 }
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 	 * if the disk i_size is already at the inode->i_size, or
 	 * this ordered extent is inside the disk i_size, we're done
 	 */
-	if (disk_i_size == i_size || offset <= disk_i_size) {
+	if (disk_i_size == i_size)
+		goto out;
+
+	/*
+	 * We still need to update disk_i_size if outstanding_isize is greater
+	 * than disk_i_size.
+	 */
+	if (offset <= disk_i_size &&
+	    (!ordered || ordered->outstanding_isize <= disk_i_size))
 		goto out;
-	}
 
 	/*
 	 * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 			break;
 		if (test->file_offset >= i_size)
 			break;
-		if (test->file_offset >= disk_i_size) {
+		if (entry_end(test) > disk_i_size) {
 			/*
 			 * we don't update disk_i_size now, so record this
 			 * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 	int corrected = 0;
 	struct btrfs_key key;
 	struct inode *inode = NULL;
+	struct btrfs_fs_info *fs_info;
 	u64 end = offset + PAGE_SIZE - 1;
 	struct btrfs_root *local_root;
+	int srcu_index;
 
 	key.objectid = root;
 	key.type = BTRFS_ROOT_ITEM_KEY;
 	key.offset = (u64)-1;
-	local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
-	if (IS_ERR(local_root))
+
+	fs_info = fixup->root->fs_info;
+	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
+
+	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(local_root)) {
+		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
 		return PTR_ERR(local_root);
+	}
 
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.objectid = inum;
 	key.offset = 0;
-	inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
+	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 	}
 
 	if (PageUptodate(page)) {
-		struct btrfs_fs_info *fs_info;
 		if (PageDirty(page)) {
 			/*
 			 * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
 	u64 physical_for_dev_replace;
 	u64 len;
 	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
+	int srcu_index;
 
 	key.objectid = root;
 	key.type = BTRFS_ROOT_ITEM_KEY;
 	key.offset = (u64)-1;
+
+	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
+
 	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
-	if (IS_ERR(local_root))
+	if (IS_ERR(local_root)) {
+		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
 		return PTR_ERR(local_root);
+	}
 
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.objectid = inum;
 	key.offset = 0;
 	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
+	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f15494699f3b..fc03aa60b684 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
 					  &root->fs_info->trans_block_rsv,
 					  num_bytes, flush);
 		if (ret)
-			return ERR_PTR(ret);
+			goto reserve_fail;
 	}
 again:
 	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
-	if (!h)
-		return ERR_PTR(-ENOMEM);
+	if (!h) {
+		ret = -ENOMEM;
+		goto alloc_fail;
+	}
 
 	/*
 	 * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +367,7 @@ again:
 	if (ret < 0) {
 		/* We must get the transaction if we are JOIN_NOLOCK. */
 		BUG_ON(type == TRANS_JOIN_NOLOCK);
-
-		if (type < TRANS_JOIN_NOLOCK)
-			sb_end_intwrite(root->fs_info->sb);
-		kmem_cache_free(btrfs_trans_handle_cachep, h);
-		return ERR_PTR(ret);
+		goto join_fail;
 	}
 
 	cur_trans = root->fs_info->running_transaction;
@@ -410,6 +408,19 @@ got_it:
 	if (!current->journal_info && type != TRANS_USERSPACE)
 		current->journal_info = h;
 	return h;
+
+join_fail:
+	if (type < TRANS_JOIN_NOLOCK)
+		sb_end_intwrite(root->fs_info->sb);
+	kmem_cache_free(btrfs_trans_handle_cachep, h);
+alloc_fail:
+	if (num_bytes)
+		btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
+					num_bytes);
+reserve_fail:
+	if (qgroup_reserved)
+		btrfs_qgroup_free(root, qgroup_reserved);
+	return ERR_PTR(ret);
 }
 
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 15f6efdf6463..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 	ret = 0;
 
 	/* Notify udev that device has changed */
-	btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+	if (bdev)
+		btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
 error_brelse:
 	brelse(bh);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 #endif
 		return -EINVAL;
 
-#ifdef CONFIG_COMPAT
-	if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN)
-#else
+	/*
+	 * can't compare against COMPAT/dlm_write_request32 because
+	 * we don't yet know if is64bit is zero
+	 */
 	if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
-#endif
 		return -EINVAL;
 
 	kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
 	return mnt;
 }
 
+static int
+nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	if (NFS_FH(dentry->d_inode)->size != 0)
+		return nfs_getattr(mnt, dentry, stat);
+	generic_fillattr(dentry->d_inode, stat);
+	return 0;
+}
+
+static int
+nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	if (NFS_FH(dentry->d_inode)->size != 0)
+		return nfs_setattr(dentry, attr);
+	return -EACCES;
+}
+
 const struct inode_operations nfs_mountpoint_inode_operations = {
 	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
 };
 
 const struct inode_operations nfs_referral_inode_operations = {
+	.getattr	= nfs_namespace_getattr,
+	.setattr	= nfs_namespace_setattr,
 };
 
 static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..2e9779b58b7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	error = nfs4_discover_server_trunking(clp, &old);
 	if (error < 0)
 		goto error;
+	nfs_put_client(clp);
 	if (clp != old) {
 		clp->cl_preserve_clid = true;
-		nfs_put_client(clp);
 		clp = old;
-		atomic_inc(&clp->cl_count);
 	}
 
 	return clp;
@@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		.clientid	= new->cl_clientid,
 		.confirm	= new->cl_confirm,
 	};
-	int status;
+	int status = -NFS4ERR_STALE_CLIENTID;
 
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 		if (prev)
 			nfs_put_client(prev);
+		prev = pos;
 
 		status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
-		if (status == 0) {
+		switch (status) {
+		case -NFS4ERR_STALE_CLIENTID:
+			break;
+		case 0:
 			nfs4_swap_callback_idents(pos, new);
 
-			nfs_put_client(pos);
+			prev = NULL;
 			*result = pos;
 			dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
 				__func__, pos, atomic_read(&pos->cl_count));
-			return 0;
-		}
-		if (status != -NFS4ERR_STALE_CLIENTID) {
-			nfs_put_client(pos);
-			dprintk("NFS: <-- %s status = %d, no result\n",
-				__func__, status);
-			return status;
+		default:
+			goto out;
 		}
 
 		spin_lock(&nn->nfs_client_lock);
-		prev = pos;
 	}
+	spin_unlock(&nn->nfs_client_lock);
 
-	/*
-	 * No matching nfs_client found.  This should be impossible,
-	 * because the new nfs_client has already been added to
-	 * nfs_client_list by nfs_get_client().
-	 *
-	 * Don't BUG(), since the caller is holding a mutex.
-	 */
+	/* No match found. The server lost our clientid */
+out:
 	if (prev)
 		nfs_put_client(prev);
-	spin_unlock(&nn->nfs_client_lock);
-	pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
-	return -NFS4ERR_STALE_CLIENTID;
+	dprintk("NFS: <-- %s status = %d\n", __func__, status);
+	return status;
 }
 
 #ifdef CONFIG_NFS_V4_1
@@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 {
 	struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
 	struct nfs_client *pos, *n, *prev = NULL;
-	int error;
+	int status = -NFS4ERR_STALE_CLIENTID;
 
 	spin_lock(&nn->nfs_client_lock);
 	list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
 				nfs_put_client(prev);
 			prev = pos;
 
-			error = nfs_wait_client_init_complete(pos);
-			if (error < 0) {
+			nfs4_schedule_lease_recovery(pos);
+			status = nfs_wait_client_init_complete(pos);
+			if (status < 0) {
 				nfs_put_client(pos);
 				spin_lock(&nn->nfs_client_lock);
 				continue;
 			}
-
+			status = pos->cl_cons_state;
 			spin_lock(&nn->nfs_client_lock);
+			if (status < 0)
+				continue;
 		}
 
 		if (pos->rpc_ops != new->rpc_ops)
@@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (!nfs4_match_serverowners(pos, new))
 			continue;
 
+		atomic_inc(&pos->cl_count);
 		spin_unlock(&nn->nfs_client_lock);
 		dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
 			__func__, pos, atomic_read(&pos->cl_count));
@@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		return 0;
 	}
 
-	/*
-	 * No matching nfs_client found.  This should be impossible,
-	 * because the new nfs_client has already been added to
-	 * nfs_client_list by nfs_get_client().
-	 *
-	 * Don't BUG(), since the caller is holding a mutex.
-	 */
+	/* No matching nfs_client found. */
 	spin_unlock(&nn->nfs_client_lock);
-	pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
-	return -NFS4ERR_STALE_CLIENTID;
+	dprintk("NFS: <-- %s status = %d\n", __func__, status);
+	return status;
 }
 #endif	/* CONFIG_NFS_V4_1 */
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..e61f68d5ef21 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
 	clp->cl_confirm = clid.confirm;
 
 	status = nfs40_walk_client_list(clp, result, cred);
-	switch (status) {
-	case -NFS4ERR_STALE_CLIENTID:
-		set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
-	case 0:
+	if (status == 0) {
 		/* Sustain the lease, even if it's empty.  If the clientid4
 		 * goes stale it's of no use for trunking discovery. */
 		nfs4_schedule_state_renewal(*result);
-		break;
 	}
-
 out:
 	return status;
 }
@@ -1863,6 +1858,7 @@ again:
 	case -ETIMEDOUT:
 	case -EAGAIN:
 		ssleep(1);
+	case -NFS4ERR_STALE_CLIENTID:
 		dprintk("NFS: %s after status %d, retrying\n",
 			__func__, status);
 		goto again;
@@ -2022,8 +2018,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
 	nfs4_begin_drain_session(clp);
 	cred = nfs4_get_exchange_id_cred(clp);
 	status = nfs4_proc_destroy_session(clp->cl_session, cred);
-	if (status && status != -NFS4ERR_BADSESSION &&
-	    status != -NFS4ERR_DEADSESSION) {
+	switch (status) {
+	case 0:
+	case -NFS4ERR_BADSESSION:
+	case -NFS4ERR_DEADSESSION:
+		break;
+	case -NFS4ERR_BACK_CHAN_BUSY:
+	case -NFS4ERR_DELAY:
+		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+		status = 0;
+		ssleep(1);
+		goto out;
+	default:
 		status = nfs4_recovery_handle_error(clp, status);
 		goto out;
 	}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..b056b1628722 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2589,27 +2589,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
 	struct nfs_server *server;
 	struct dentry *mntroot = ERR_PTR(-ENOMEM);
 	struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
-	int error;
 
-	dprintk("--> nfs_xdev_mount_common()\n");
+	dprintk("--> nfs_xdev_mount()\n");
 
 	mount_info.mntfh = mount_info.cloned->fh;
 
 	/* create a new volume representation */
 	server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_err;
-	}
 
-	mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod);
-	dprintk("<-- nfs_xdev_mount_common() = 0\n");
-out:
-	return mntroot;
+	if (IS_ERR(server))
+		mntroot = ERR_CAST(server);
+	else
+		mntroot = nfs_fs_mount_common(server, flags,
+				dev_name, &mount_info, nfs_mod);
 
-out_err:
-	dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
-	goto out;
+	dprintk("<-- nfs_xdev_mount() = %ld\n",
+			IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
+	return mntroot;
 }
 
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..f3859354e41a 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
 	if (ret < 0)
 		printk(KERN_ERR "NILFS: GC failed during preparation: "
 			"cannot read source blocks: err=%d\n", ret);
-	else
+	else {
+		if (nilfs_sb_need_update(nilfs))
+			set_nilfs_discontinued(nilfs);
 		ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+	}
 
 	nilfs_remove_all_gcinodes(nilfs);
 	clear_nilfs_gc_running(nilfs);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
 	}
 
 	if (ioend->io_iocb) {
+		inode_dio_done(ioend->io_inode);
 		if (ioend->io_isasync) {
 			aio_complete(ioend->io_iocb, ioend->io_error ?
 					ioend->io_error : ioend->io_result, 0);
 		}
-		inode_dio_done(ioend->io_inode);
 	}
 
 	mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..cdb2d3348583 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4680,9 +4680,6 @@ __xfs_bmapi_allocate(
 			return error;
 	}
 
-	if (bma->flags & XFS_BMAPI_STACK_SWITCH)
-		bma->stack_switch = 1;
-
 	error = xfs_bmap_alloc(bma);
 	if (error)
 		return error;
@@ -4956,6 +4953,9 @@ xfs_bmapi_write(
 	bma.flist = flist;
 	bma.firstblock = firstblock;
 
+	if (flags & XFS_BMAPI_STACK_SWITCH)
+		bma.stack_switch = 1;
+
 	while (bno < end && n < *nmap) {
 		inhole = eof || bma.got.br_startoff > bno;
 		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 56d1614760cf..fbbb9eb92e32 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -487,6 +487,7 @@ _xfs_buf_find(
 	struct rb_node		*parent;
 	xfs_buf_t		*bp;
 	xfs_daddr_t		blkno = map[0].bm_bn;
+	xfs_daddr_t		eofs;
 	int			numblks = 0;
 	int			i;
 
@@ -498,6 +499,23 @@ _xfs_buf_find(
 	ASSERT(!(numbytes < (1 << btp->bt_sshift)));
 	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
 
+	/*
+	 * Corrupted block numbers can get through to here, unfortunately, so we
+	 * have to check that the buffer falls within the filesystem bounds.
+	 */
+	eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
+	if (blkno >= eofs) {
+		/*
+		 * XXX (dgc): we should really be returning EFSCORRUPTED here,
+		 * but none of the higher level infrastructure supports
+		 * returning a specific error on buffer lookup failures.
+		 */
+		xfs_alert(btp->bt_mount,
+			  "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+			  __func__, blkno, eofs);
+		return NULL;
+	}
+
 	/* get tree root */
 	pag = xfs_perag_get(btp->bt_mount,
 				xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -1487,6 +1505,8 @@ restart:
 	while (!list_empty(&btp->bt_lru)) {
 		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
 		if (atomic_read(&bp->b_hold) > 1) {
+			trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+			list_move_tail(&bp->b_lru, &btp->bt_lru);
 			spin_unlock(&btp->bt_lru_lock);
 			delay(100);
 			goto restart;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 77b09750e92c..3f9949fee391 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -652,7 +652,10 @@ xfs_buf_item_unlock(
 
 	/*
 	 * If the buf item isn't tracking any data, free it, otherwise drop the
-	 * reference we hold to it.
+	 * reference we hold to it. If we are aborting the transaction, this may
+	 * be the only reference to the buf item, so we free it anyway
+	 * regardless of whether it is dirty or not. A dirty abort implies a
+	 * shutdown, anyway.
 	 */
 	clean = 1;
 	for (i = 0; i < bip->bli_format_count; i++) {
@@ -664,7 +667,12 @@ xfs_buf_item_unlock(
 	}
 	if (clean)
 		xfs_buf_item_relse(bp);
-	else
+	else if (aborted) {
+		if (atomic_dec_and_test(&bip->bli_refcount)) {
+			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+			xfs_buf_item_relse(bp);
+		}
+	} else
 		atomic_dec(&bip->bli_refcount);
 
 	if (!hold)
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..a8bd26b82ecb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -246,10 +246,10 @@ xfs_swap_extents(
 		goto out_unlock;
 	}
 
-	error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
 	if (error)
 		goto out_unlock;
-	truncate_pagecache_range(VFS_I(ip), 0, -1);
+	truncate_pagecache_range(VFS_I(tip), 0, -1);
 
 	/* Verify O_DIRECT for ftmp */
 	if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..364818eef40e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -351,6 +351,15 @@ xfs_iomap_prealloc_size(
 		}
 		if (shift)
 			alloc_blocks >>= shift;
+
+		/*
+		 * If we are still trying to allocate more space than is
+		 * available, squash the prealloc hard. This can happen if we
+		 * have a large file on a small filesystem and the above
+		 * lowspace thresholds are smaller than MAXEXTLEN.
+		 */
+		while (alloc_blocks >= freesp)
+			alloc_blocks >>= 4;
 	}
 
 	if (alloc_blocks < mp->m_writeio_blocks)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..7d6df7c00c36 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
 		return;
 	}
 	/* quietly fail */
-	xfs_buf_ioerror(bp, EFSCORRUPTED);
+	xfs_buf_ioerror(bp, EWRONGFS);
 }
 
 static void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
 DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
 DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
 DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);