Merge tag 'v4.11-rc6' into sched/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2017-04-11 10:05:36 +0300
committer: Ingo Molnar <mingo@kernel.org> 2017-04-11 10:05:36 +0300
commit: 36a4dfc37856cae40e4d864950f768d8b7d4f9f5 (patch)
tree: 3aa6a1459a699b986632f64b740e8e98e3ce6500 /fs
parent: a481db34b9beb7a9647c23f2320dd38a2b1d681f (diff)
parent: 39da7c509acff13fc8cb12ec1bb20337c988ed36 (diff)
download: linux-36a4dfc37856cae40e4d864950f768d8b7d4f9f5.tar.xz
87 files changed, 1280 insertions, 773 deletions
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b29447e03ede..25d404d22cae 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
 {
 	struct afs_server *server;
 	struct afs_vnode *vnode, *xvnode;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find the first vnode to update */
 	spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+	vnode->update_at = ktime_get_real_seconds() +
+			afs_vnode_update_timeout;
 
 	spin_lock(&server->cb_lock);
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2edbdcbf6432..3062cceb5c2a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	struct afs_callback *cb;
 	struct afs_server *server;
 	__be32 *bp;
-	u32 tmp;
 	int ret, loop;
 
 	_enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 
-		tmp = ntohl(call->tmp);
-		_debug("CB count: %u", tmp);
-		if (tmp != call->count && tmp != 0)
+		call->count2 = ntohl(call->tmp);
+		_debug("CB count: %u", call->count2);
+		if (call->count2 != call->count && call->count2 != 0)
 			return -EBADMSG;
 		call->offset = 0;
 		call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	case 4:
 		_debug("extract CB array");
 		ret = afs_extract_data(call, call->buffer,
-				       call->count * 3 * 4, false);
+				       call->count2 * 3 * 4, false);
 		if (ret < 0)
 			return ret;
 
 		_debug("unmarshall CB array");
 		cb = call->request;
 		bp = call->buffer;
-		for (loop = call->count; loop > 0; loop--, cb++) {
+		for (loop = call->count2; loop > 0; loop--, cb++) {
 			cb->version	= ntohl(*bp++);
 			cb->expiry	= ntohl(*bp++);
 			cb->type	= ntohl(*bp++);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ba7b71fba34b..0d5b8508869b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
 
 const struct file_operations afs_file_operations = {
 	.open		= afs_open,
+	.flush		= afs_flush,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -184,10 +185,13 @@ int afs_page_filler(void *data, struct page *page)
 		if (!req)
 			goto enomem;
 
+		/* We request a full page.  If the page is a partial one at the
+		 * end of the file, the server will return a short read and the
+		 * unmarshalling code will clear the unfilled space.
+		 */
 		atomic_set(&req->usage, 1);
 		req->pos = (loff_t)page->index << PAGE_SHIFT;
-		req->len = min_t(size_t, i_size_read(inode) - req->pos,
-				 PAGE_SIZE);
+		req->len = PAGE_SIZE;
 		req->nr_pages = 1;
 		req->pages[0] = page;
 		get_page(page);
@@ -208,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
 			fscache_uncache_page(vnode->cache, page);
 #endif
 			BUG_ON(PageFsCache(page));
-			goto error;
+
+			if (ret == -EINTR ||
+			    ret == -ENOMEM ||
+			    ret == -ERESTARTSYS ||
+			    ret == -EAGAIN)
+				goto error;
+			goto io_error;
 		}
 
 		SetPageUptodate(page);
@@ -227,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
 	_leave(" = 0");
 	return 0;
 
+io_error:
+	SetPageError(page);
+	goto error;
 enomem:
 	ret = -ENOMEM;
 error:
-	SetPageError(page);
 	unlock_page(page);
 	_leave(" = %d", ret);
 	return ret;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ac8e766978dc..19f76ae36982 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -17,6 +17,12 @@
 #include "afs_fs.h"
 
 /*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
+/*
  * decode an AFSFid block
  */
 static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
@@ -105,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 			vnode->vfs_inode.i_mode = mode;
 		}
 
-		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_server;
+		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_client;
 		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_version	= data_version;
@@ -139,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
 	vnode->cb_version	= ntohl(*bp++);
 	vnode->cb_expiry	= ntohl(*bp++);
 	vnode->cb_type		= ntohl(*bp++);
-	vnode->cb_expires	= vnode->cb_expiry + get_seconds();
+	vnode->cb_expires	= vnode->cb_expiry + ktime_get_real_seconds();
 	*_bp = bp;
 }
 
@@ -315,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 	void *buffer;
 	int ret;
 
-	_enter("{%u,%zu/%u;%u/%llu}",
+	_enter("{%u,%zu/%u;%llu/%llu}",
 	       call->unmarshall, call->offset, call->count,
 	       req->remain, req->actual_len);
 
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		req->actual_len |= ntohl(call->tmp);
 		_debug("DATA length: %llu", req->actual_len);
-		/* Check that the server didn't want to send us extra.  We
-		 * might want to just discard instead, but that requires
-		 * cooperation from AF_RXRPC.
-		 */
-		if (req->actual_len > req->len)
-			return -EBADMSG;
 
 		req->remain = req->actual_len;
 		call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->unmarshall++;
 
 	begin_page:
+		ASSERTCMP(req->index, <, req->nr_pages);
 		if (req->remain > PAGE_SIZE - call->offset)
 			size = PAGE_SIZE - call->offset;
 		else
@@ -378,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		/* extract the returned data */
 	case 3:
-		_debug("extract data %u/%llu %zu/%u",
+		_debug("extract data %llu/%llu %zu/%u",
 		       req->remain, req->actual_len, call->offset, call->count);
 
 		buffer = kmap(req->pages[req->index]);
@@ -389,19 +390,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (call->offset == PAGE_SIZE) {
 			if (req->page_done)
 				req->page_done(call, req);
+			req->index++;
 			if (req->remain > 0) {
-				req->index++;
 				call->offset = 0;
+				if (req->index >= req->nr_pages) {
+					call->unmarshall = 4;
+					goto begin_discard;
+				}
 				goto begin_page;
 			}
 		}
+		goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+	begin_discard:
+	case 4:
+		size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
+		call->count = size;
+		_debug("extract discard %llu/%llu %zu/%u",
+		       req->remain, req->actual_len, call->offset, call->count);
+
+		call->offset = 0;
+		ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+		req->remain -= call->offset;
+		if (ret < 0)
+			return ret;
+		if (req->remain > 0)
+			goto begin_discard;
 
 	no_more_data:
 		call->offset = 0;
-		call->unmarshall++;
+		call->unmarshall = 5;
 
 		/* extract the metadata */
-	case 4:
+	case 5:
 		ret = afs_extract_data(call, call->buffer,
 				       (21 + 3 + 6) * 4, false);
 		if (ret < 0)
@@ -416,16 +438,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->offset = 0;
 		call->unmarshall++;
 
-	case 5:
+	case 6:
 		break;
 	}
 
-	if (call->count < PAGE_SIZE) {
-		buffer = kmap(req->pages[req->index]);
-		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap(req->pages[req->index]);
+	for (; req->index < req->nr_pages; req->index++) {
+		if (call->count < PAGE_SIZE)
+			zero_user_segment(req->pages[req->index],
+					  call->count, PAGE_SIZE);
 		if (req->page_done)
 			req->page_done(call, req);
+		call->count = 0;
 	}
 
 	_leave(" = 0 [done]");
@@ -711,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -980,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
 		memset(bp, 0, c_padsz);
 		bp = (void *) bp + c_padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1180,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
@@ -1213,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	_enter(",%x,{%x:%u},,",
 	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
 
-	size = to - offset;
+	size = (loff_t)to - (loff_t)offset;
 	if (first != last)
 		size += (loff_t)(last - first) << PAGE_SHIFT;
 	pos = (loff_t)first << PAGE_SHIFT;
@@ -1257,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1e4897a048d2..aae55dd15108 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 		inode->i_fop	= &afs_dir_file_operations;
 		break;
 	case AFS_FTYPE_SYMLINK:
-		inode->i_mode	= S_IFLNK | vnode->status.mode;
-		inode->i_op	= &page_symlink_inode_operations;
+		/* Symlinks with a mode of 0644 are actually mountpoints. */
+		if ((vnode->status.mode & 0777) == 0644) {
+			inode->i_flags |= S_AUTOMOUNT;
+
+			spin_lock(&vnode->lock);
+			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+			spin_unlock(&vnode->lock);
+
+			inode->i_mode	= S_IFDIR | 0555;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		} else {
+			inode->i_mode	= S_IFLNK | vnode->status.mode;
+			inode->i_op	= &page_symlink_inode_operations;
+		}
 		inode_nohighmem(inode);
 		break;
 	default:
@@ -70,27 +83,15 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 
 	set_nlink(inode, vnode->status.nlink);
 	inode->i_uid		= vnode->status.owner;
-	inode->i_gid		= GLOBAL_ROOT_GID;
+	inode->i_gid            = vnode->status.group;
 	inode->i_size		= vnode->status.size;
-	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
+	inode->i_ctime.tv_sec	= vnode->status.mtime_client;
 	inode->i_ctime.tv_nsec	= 0;
 	inode->i_atime		= inode->i_mtime = inode->i_ctime;
 	inode->i_blocks		= 0;
 	inode->i_generation	= vnode->fid.unique;
 	inode->i_version	= vnode->status.data_version;
 	inode->i_mapping->a_ops	= &afs_fs_aops;
-
-	/* check to see whether a symbolic link is really a mountpoint */
-	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-		afs_mntpt_check_symlink(vnode, key);
-
-		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-			inode->i_mode	= S_IFDIR | vnode->status.mode;
-			inode->i_op	= &afs_mntpt_inode_operations;
-			inode->i_fop	= &afs_mntpt_file_operations;
-		}
-	}
-
 	return 0;
 }
 
@@ -245,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			vnode->cb_version = 0;
 			vnode->cb_expiry = 0;
 			vnode->cb_type = 0;
-			vnode->cb_expires = get_seconds();
+			vnode->cb_expires = ktime_get_real_seconds();
 		} else {
 			vnode->cb_version = cb->version;
 			vnode->cb_expiry = cb->expiry;
 			vnode->cb_type = cb->type;
-			vnode->cb_expires = vnode->cb_expiry + get_seconds();
+			vnode->cb_expires = vnode->cb_expiry +
+				ktime_get_real_seconds();
 		}
 	}
 
@@ -323,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		if (vnode->cb_expires < get_seconds() + 10) {
+		if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
 			_debug("callback expired");
 			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
 		} else {
@@ -444,7 +446,7 @@ void afs_evict_inode(struct inode *inode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 	if (permits)
 		call_rcu(&permits->rcu, afs_zap_permits);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5dfa56903a2d..a6901360fb81 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/rxrpc.h>
@@ -90,7 +91,10 @@ struct afs_call {
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
-	unsigned		last_to;	/* amount of mapping[last] */
+	union {
+		unsigned	last_to;	/* amount of mapping[last] */
+		unsigned	count2;		/* count used in unmarshalling */
+	};
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
@@ -127,12 +131,11 @@ struct afs_call_type {
  */
 struct afs_read {
 	loff_t			pos;		/* Where to start reading */
-	loff_t			len;		/* How much to read */
+	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
+	loff_t			remain;		/* Amount remaining */
 	atomic_t		usage;
-	unsigned int		remain;		/* Amount remaining */
 	unsigned int		index;		/* Which page we're reading into */
-	unsigned int		pg_offset;	/* Offset in page we're at */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);
 	struct page		*pages[];
@@ -247,7 +250,7 @@ struct afs_cache_vhash {
  */
 struct afs_vlocation {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct list_head	link;		/* link in cell volume location list */
 	struct list_head	grave;		/* link in master graveyard list */
 	struct list_head	update;		/* link in master update list */
@@ -258,7 +261,7 @@ struct afs_vlocation {
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
 	wait_queue_head_t	waitq;		/* status change waitqueue */
-	time_t			update_at;	/* time at which record should be updated */
+	time64_t		update_at;	/* time at which record should be updated */
 	spinlock_t		lock;		/* access lock */
 	afs_vlocation_state_t	state;		/* volume location state */
 	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
@@ -271,7 +274,7 @@ struct afs_vlocation {
  */
 struct afs_server {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct in_addr		addr;		/* server address */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
@@ -374,8 +377,8 @@ struct afs_vnode {
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
 	struct rb_node		cb_promise;	/* link in server->cb_promises */
 	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	time_t			cb_expires;	/* time at which callback expires */
-	time_t			cb_expires_at;	/* time used to order cb_promise */
+	time64_t		cb_expires;	/* time at which callback expires */
+	time64_t		cb_expires_at;	/* time used to order cb_promise */
 	unsigned		cb_version;	/* callback version */
 	unsigned		cb_expiry;	/* callback expiry time */
 	afs_callback_type_t	cb_type;	/* type of callback */
@@ -557,7 +560,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 
 extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 
 /*
@@ -718,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 
 
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa0d8b3..100b207efc9e 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
 	case RXKADDATALEN:	return -EKEYREJECTED;
 	case RXKADILLEGALLEVEL:	return -EKEYREJECTED;
 
+	case RXGEN_OPCODE:	return -ENOTSUPP;
+
 	default:		return -EREMOTEIO;
 	}
 }
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index d4fb0afc0097..bd3b65cde282 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -47,59 +47,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
 /*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-	struct page *page;
-	size_t size;
-	char *buf;
-	int ret;
-
-	_enter("{%x:%u,%u}",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-	/* read the contents of the symlink into the pagecache */
-	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-			       afs_page_filler, key);
-	if (IS_ERR(page)) {
-		ret = PTR_ERR(page);
-		goto out;
-	}
-
-	ret = -EIO;
-	if (PageError(page))
-		goto out_free;
-
-	buf = kmap(page);
-
-	/* examine the symlink's contents */
-	size = vnode->status.size;
-	_debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-	if (size > 2 &&
-	    (buf[0] == '%' || buf[0] == '#') &&
-	    buf[size - 1] == '.'
-	    ) {
-		_debug("symlink is a mountpoint");
-		spin_lock(&vnode->lock);
-		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-		vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-		spin_unlock(&vnode->lock);
-	}
-
-	ret = 0;
-
-	kunmap(page);
-out_free:
-	put_page(page);
-out:
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
  * no valid lookup procedure on this sort of dir
  */
 static struct dentry *afs_mntpt_lookup(struct inode *dir,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 419ef05dcb5e..8f76b13d5549 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
 	call->buffer = NULL;
 }
 
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+			  struct bio_vec *bv, pgoff_t first, pgoff_t last,
+			  unsigned offset)
+{
+	struct page *pages[AFS_BVEC_MAX];
+	unsigned int nr, n, i, to, bytes = 0;
+
+	nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+	n = find_get_pages_contig(call->mapping, first, nr, pages);
+	ASSERTCMP(n, ==, nr);
+
+	msg->msg_flags |= MSG_MORE;
+	for (i = 0; i < nr; i++) {
+		to = PAGE_SIZE;
+		if (first + i >= last) {
+			to = call->last_to;
+			msg->msg_flags &= ~MSG_MORE;
+		}
+		bv[i].bv_page = pages[i];
+		bv[i].bv_len = to - offset;
+		bv[i].bv_offset = offset;
+		bytes += to - offset;
+		offset = 0;
+	}
+
+	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
-	struct page *pages[8];
-	unsigned count, n, loop, offset, to;
+	struct bio_vec bv[AFS_BVEC_MAX];
+	unsigned int bytes, nr, loop, offset;
 	pgoff_t first = call->first, last = call->last;
 	int ret;
 
-	_enter("");
-
 	offset = call->first_offset;
 	call->first_offset = 0;
 
 	do {
-		_debug("attach %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > ARRAY_SIZE(pages))
-			count = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(call->mapping, first, count, pages);
-		ASSERTCMP(n, ==, count);
-
-		loop = 0;
-		do {
-			struct bio_vec bvec = {.bv_page = pages[loop],
-					       .bv_offset = offset};
-			msg->msg_flags = 0;
-			to = PAGE_SIZE;
-			if (first + loop >= last)
-				to = call->last_to;
-			else
-				msg->msg_flags = MSG_MORE;
-			bvec.bv_len = to - offset;
-			offset = 0;
-
-			_debug("- range %u-%u%s",
-			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
-				      &bvec, 1, to - offset);
-
-			/* have to change the state *before* sending the last
-			 * packet as RxRPC might give us the reply before it
-			 * returns from sending the request */
-			if (first + loop >= last)
-				call->state = AFS_CALL_AWAIT_REPLY;
-			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-						     msg, to - offset);
-			if (ret < 0)
-				break;
-		} while (++loop < count);
-		first += count;
-
-		for (loop = 0; loop < count; loop++)
-			put_page(pages[loop]);
+		afs_load_bvec(call, msg, bv, first, last, offset);
+		offset = 0;
+		bytes = msg->msg_iter.count;
+		nr = msg->msg_iter.nr_segs;
+
+		/* Have to change the state *before* sending the last
+		 * packet as RxRPC might give us the reply before it
+		 * returns from sending the request.
+		 */
+		if (first + nr - 1 >= last)
+			call->state = AFS_CALL_AWAIT_REPLY;
+		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+					     msg, bytes);
+		for (loop = 0; loop < nr; loop++)
+			put_page(bv[loop].bv_page);
 		if (ret < 0)
 			break;
+
+		first += nr;
 	} while (first <= last);
 
-	_leave(" = %d", ret);
 	return ret;
 }
 
@@ -333,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
+	size_t offset;
+	u32 abort_code;
 	int ret;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -381,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
 
-	/* have to change the state *before* sending the last packet as RxRPC
-	 * might give us the reply before it returns from sending the
-	 * request */
+	/* We have to change the state *before* sending the last packet as
+	 * rxrpc might give us the reply before it returns from sending the
+	 * request.  Further, if the send fails, we may already have been given
+	 * a notification and may have collected it.
+	 */
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -405,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	return afs_wait_for_call_to_complete(call);
 
 error_do_abort:
-	rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+	call->state = AFS_CALL_COMPLETE;
+	if (ret != -ECONNABORTED) {
+		rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+					-ret, "KSD");
+	} else {
+		abort_code = 0;
+		offset = 0;
+		rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+				       false, &abort_code);
+		ret = call->type->abort_to_error(abort_code);
+	}
 error_kill_call:
 	afs_put_call(call);
 	_leave(" = %d", ret);
@@ -452,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -EINPROGRESS:
 		case -EAGAIN:
 			goto out;
+		case -ECONNABORTED:
+			goto call_complete;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KNC");
-			goto do_abort;
+			goto save_error;
 		case -ENOTSUPP:
-			abort_code = RX_INVALID_OPERATION;
+			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KIV");
-			goto do_abort;
+			goto save_error;
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
@@ -471,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, EBADMSG, "KUM");
-			goto do_abort;
+			goto save_error;
 		}
 	}
 
@@ -482,8 +505,9 @@ out:
 	_leave("");
 	return;
 
-do_abort:
+save_error:
 	call->error = ret;
+call_complete:
 	call->state = AFS_CALL_COMPLETE;
 	goto done;
 }
@@ -493,7 +517,6 @@ do_abort:
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-	const char *abort_why;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -512,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 			continue;
 		}
 
-		abort_why = "KWC";
-		ret = call->error;
-		if (call->state == AFS_CALL_COMPLETE)
-			break;
-		abort_why = "KWI";
-		ret = -EINTR;
-		if (signal_pending(current))
+		if (call->state == AFS_CALL_COMPLETE ||
+		    signal_pending(current))
 			break;
 		schedule();
 	}
@@ -526,13 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 	remove_wait_queue(&call->waitq, &myself);
 	__set_current_state(TASK_RUNNING);
 
-	/* kill the call */
+	/* Kill off the call if it's still live. */
 	if (call->state < AFS_CALL_COMPLETE) {
-		_debug("call incomplete");
+		_debug("call interrupted");
 		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
-					RX_CALL_DEAD, -ret, abort_why);
+					RX_USER_ABORT, -EINTR, "KWI");
 	}
 
+	ret = call->error;
 	_debug("call complete");
 	afs_put_call(call);
 	_leave(" = %d", ret);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc89..ecb86a670180 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 
 	if (permits)
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
 	} else {
 		if (!(access & AFS_ACE_LOOKUP))
 			goto permission_denied;
+		if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+			goto permission_denied;
 		if (mask & (MAY_EXEC | MAY_READ)) {
 			if (!(access & AFS_ACE_READ))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IRUSR))
+				goto permission_denied;
 		} else if (mask & MAY_WRITE) {
 			if (!(access & AFS_ACE_WRITE))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IWUSR))
+				goto permission_denied;
 		}
 	}
 
 	key_put(key);
-	ret = generic_permission(inode, mask);
 	_leave(" = %d", ret);
 	return ret;
 
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab7dd55..c001b1f2455f 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
 	spin_lock(&afs_server_graveyard_lock);
 	if (atomic_read(&server->usage) == 0) {
 		list_move_tail(&server->grave, &afs_server_graveyard);
-		server->time_of_death = get_seconds();
+		server->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_server_reaper,
 				   afs_server_timeout * HZ);
 	}
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_server *server;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_server_graveyard_lock);
 
 	while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index d7d8dd8c0b31..37b7c3b342a6 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 	struct afs_vlocation *xvl;
 
 	/* wait at least 10 minutes before updating... */
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
 	if (atomic_read(&vl->usage) == 0) {
 		_debug("buried");
 		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-		vl->time_of_death = get_seconds();
+		vl->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_vlocation_reap,
 				   afs_vlocation_timeout * HZ);
 
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_vlocation *vl;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_vlocation_graveyard_lock);
 
 	while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
 {
 	struct afs_cache_vlocation vldb;
 	struct afs_vlocation *vl, *xvl;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find a record to update */
 	spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c83c1a0e851f..2d2fccd5044b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
  * partly or wholly fill a page that's under preparation for writing
  */
 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-			 loff_t pos, struct page *page)
+			 loff_t pos, unsigned int len, struct page *page)
 {
 	struct afs_read *req;
-	loff_t i_size;
 	int ret;
 
 	_enter(",,%llu", (unsigned long long)pos);
@@ -99,14 +98,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 
 	atomic_set(&req->usage, 1);
 	req->pos = pos;
+	req->len = len;
 	req->nr_pages = 1;
 	req->pages[0] = page;
-
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_SIZE > i_size)
-		req->len = i_size - pos;
-	else
-		req->len = PAGE_SIZE;
+	get_page(page);
 
 	ret = afs_vnode_fetch_data(vnode, key, req);
 	afs_put_read(req);
@@ -159,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		kfree(candidate);
 		return -ENOMEM;
 	}
-	*pagep = page;
-	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
 			return ret;
@@ -172,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		SetPageUptodate(page);
 	}
 
+	/* page won't leak in error case: it eventually gets cleaned off LRU */
+	*pagep = page;
+
 try_again:
 	spin_lock(&vnode->writeback_lock);
 
@@ -233,7 +231,7 @@ flush_conflicting_wb:
 	if (wb->state == AFS_WBACK_PENDING)
 		wb->state = AFS_WBACK_CONFLICTING;
 	spin_unlock(&vnode->writeback_lock);
-	if (PageDirty(page)) {
+	if (clear_page_dirty_for_io(page)) {
 		ret = afs_write_back_from_locked_page(wb, page);
 		if (ret < 0) {
 			afs_put_writeback(candidate);
@@ -257,7 +255,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		  struct page *page, void *fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct key *key = file->private_data;
 	loff_t i_size, maybe_i_size;
+	int ret;
 
 	_enter("{%x:%u},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -273,6 +273,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		spin_unlock(&vnode->writeback_lock);
 	}
 
+	if (!PageUptodate(page)) {
+		if (copied < len) {
+			/* Try and load any missing data from the server.  The
+			 * unmarshalling routine will take care of clearing any
+			 * bits that are beyond the EOF.
+			 */
+			ret = afs_fill_page(vnode, key, pos + copied,
+					    len - copied, page);
+			if (ret < 0)
+				return ret;
+		}
+		SetPageUptodate(page);
+	}
+
 	set_page_dirty(page);
 	if (PageDirty(page))
 		_debug("dirtied");
@@ -307,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 		ASSERTCMP(pv.nr, ==, count);
 
 		for (loop = 0; loop < count; loop++) {
-			ClearPageUptodate(pv.pages[loop]);
+			struct page *page = pv.pages[loop];
+			ClearPageUptodate(page);
 			if (error)
-				SetPageError(pv.pages[loop]);
-			end_page_writeback(pv.pages[loop]);
+				SetPageError(page);
+			if (PageWriteback(page))
+				end_page_writeback(page);
+			if (page->index >= first)
+				first = page->index + 1;
 		}
 
 		__pagevec_release(&pv);
@@ -335,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 	_enter(",%lx", primary_page->index);
 
 	count = 1;
-	if (!clear_page_dirty_for_io(primary_page))
-		BUG();
 	if (test_set_page_writeback(primary_page))
 		BUG();
 
@@ -502,17 +518,17 @@ static int afs_writepages_region(struct address_space *mapping,
 		 */
 		lock_page(page);
 
-		if (page->mapping != mapping) {
+		if (page->mapping != mapping || !PageDirty(page)) {
 			unlock_page(page);
 			put_page(page);
 			continue;
 		}
 
-		if (wbc->sync_mode != WB_SYNC_NONE)
-			wait_on_page_writeback(page);
-
-		if (PageWriteback(page) || !PageDirty(page)) {
+		if (PageWriteback(page)) {
 			unlock_page(page);
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
+			put_page(page);
 			continue;
 		}
 
@@ -523,6 +539,8 @@ static int afs_writepages_region(struct address_space *mapping,
 		wb->state = AFS_WBACK_WRITING;
 		spin_unlock(&wb->vnode->writeback_lock);
 
+		if (!clear_page_dirty_for_io(page))
+			BUG();
 		ret = afs_write_back_from_locked_page(wb, page);
 		unlock_page(page);
 		put_page(page);
@@ -746,6 +764,20 @@ out:
 }
 
 /*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+	_enter("");
+
+	if ((file->f_mode & FMODE_WRITE) == 0)
+		return 0;
+
+	return vfs_fsync(file, 0);
+}
+
+/*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
  */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29b7fc28c607..c4115901d906 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1259,7 +1259,7 @@ struct btrfs_root {
 	atomic_t will_be_snapshoted;
 
 	/* For qgroup metadata space reserve */
-	atomic_t qgroup_meta_rsv;
+	atomic64_t qgroup_meta_rsv;
 };
 static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
 {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08b74daf35d0..eb1ee7b6f532 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->orphan_inodes, 0);
 	atomic_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshoted, 0);
-	atomic_set(&root->qgroup_meta_rsv, 0);
+	atomic64_set(&root->qgroup_meta_rsv, 0);
 	root->log_transid = 0;
 	root->log_transid_committed = -1;
 	root->last_log_commit = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 28e81922a21c..27fdb250b446 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
 			 * can we find nothing at @index.
 			 */
 			ASSERT(page_ops & PAGE_LOCK);
-			return ret;
+			err = -EAGAIN;
+			goto out;
 		}
 
 		for (i = 0; i < ret; i++) {
@@ -2583,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		if (tree->ops) {
 			ret = tree->ops->readpage_io_failed_hook(page, mirror);
-			if (!ret && !bio->bi_error)
-				uptodate = 1;
-		} else {
+			if (ret == -EAGAIN) {
+				/*
+				 * Data inode's readpage_io_failed_hook() always
+				 * returns -EAGAIN.
+				 *
+				 * The generic bio_readpage_error handles errors
+				 * the following way: If possible, new read
+				 * requests are created and submitted and will
+				 * end up in end_bio_extent_readpage as well (if
+				 * we're lucky, not in the !uptodate case). In
+				 * that case it returns 0 and we just go on with
+				 * the next page in our bio. If it can't handle
+				 * the error it will return -EIO and we remain
+				 * responsible for that page.
+				 */
+				ret = bio_readpage_error(bio, offset, page,
+							 start, end, mirror);
+				if (ret == 0) {
+					uptodate = !bio->bi_error;
+					offset += len;
+					continue;
+				}
+			}
+
 			/*
-			 * The generic bio_readpage_error handles errors the
-			 * following way: If possible, new read requests are
-			 * created and submitted and will end up in
-			 * end_bio_extent_readpage as well (if we're lucky, not
-			 * in the !uptodate case). In that case it returns 0 and
-			 * we just go on with the next page in our bio. If it
-			 * can't handle the error it will return -EIO and we
-			 * remain responsible for that page.
+			 * metadata's readpage_io_failed_hook() always returns
+			 * -EIO and fixes nothing.  -EIO is also returned if
+			 * data inode error could not be fixed.
 			 */
-			ret = bio_readpage_error(bio, offset, page, start, end,
-						 mirror);
-			if (ret == 0) {
-				uptodate = !bio->bi_error;
-				offset += len;
-				continue;
-			}
+			ASSERT(ret == -EIO);
 		}
 readpage_ok:
 		if (likely(uptodate)) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c40060cc481f..a18510be76c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	max_size = min_t(unsigned long, PAGE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
+
+	/*
+	 * decompression code contains a memset to fill in any space between the end
+	 * of the uncompressed data and the end of max_size in case the decompressed
+	 * data ends up shorter than ram_bytes.  That doesn't cover the hole between
+	 * the end of an inline extent and the beginning of the next block, so we
+	 * cover that region here.
+	 */
+
+	if (max_size + pg_offset < PAGE_SIZE) {
+		char *map = kmap(page);
+		memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+		kunmap(page);
+	}
 	kfree(tmp);
 	return ret;
 }
@@ -10509,9 +10523,9 @@ out_inode:
 }
 
 __attribute__((const))
-static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
 {
-	return 0;
+	return -EAGAIN;
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {
@@ -10556,7 +10570,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.merge_bio_hook = btrfs_merge_bio_hook,
-	.readpage_io_failed_hook = dummy_readpage_io_failed_hook,
+	.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
 
 	/* optional callbacks */
 	.fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5da750c1087..a59801dc2a34 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2948,20 +2948,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 	ret = qgroup_reserve(root, num_bytes, enforce);
 	if (ret < 0)
 		return ret;
-	atomic_add(num_bytes, &root->qgroup_meta_rsv);
+	atomic64_add(num_bytes, &root->qgroup_meta_rsv);
 	return ret;
 }
 
 void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	int reserved;
+	u64 reserved;
 
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
 	    !is_fstree(root->objectid))
 		return;
 
-	reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
+	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
 	if (reserved == 0)
 		return;
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
@@ -2976,8 +2976,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 		return;
 
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-	WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
-	atomic_sub(num_bytes, &root->qgroup_meta_rsv);
+	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
+	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
 }
 
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 456c8901489b..a60d5bfb8a49 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 		goto out;
 	}
 
+	/*
+	 * Check that we don't overflow at later allocations, we request
+	 * clone_sources_count + 1 items, and compare to unsigned long inside
+	 * access_ok.
+	 */
 	if (arg->clone_sources_count >
-	    ULLONG_MAX / sizeof(*arg->clone_sources)) {
+	    ULONG_MAX / sizeof(struct clone_root) - 1) {
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 15e1db8738ae..dd3f5fabfdf6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -972,6 +972,86 @@ out:
 	return rc;
 }
 
+ssize_t cifs_file_copychunk_range(unsigned int xid,
+				struct file *src_file, loff_t off,
+				struct file *dst_file, loff_t destoff,
+				size_t len, unsigned int flags)
+{
+	struct inode *src_inode = file_inode(src_file);
+	struct inode *target_inode = file_inode(dst_file);
+	struct cifsFileInfo *smb_file_src;
+	struct cifsFileInfo *smb_file_target;
+	struct cifs_tcon *src_tcon;
+	struct cifs_tcon *target_tcon;
+	ssize_t rc;
+
+	cifs_dbg(FYI, "copychunk range\n");
+
+	if (src_inode == target_inode) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!src_file->private_data || !dst_file->private_data) {
+		rc = -EBADF;
+		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+		goto out;
+	}
+
+	rc = -EXDEV;
+	smb_file_target = dst_file->private_data;
+	smb_file_src = src_file->private_data;
+	src_tcon = tlink_tcon(smb_file_src->tlink);
+	target_tcon = tlink_tcon(smb_file_target->tlink);
+
+	if (src_tcon->ses != target_tcon->ses) {
+		cifs_dbg(VFS, "source and target of copy not on same server\n");
+		goto out;
+	}
+
+	/*
+	 * Note: cifs case is easier than btrfs since server responsible for
+	 * checks for proper open modes and file type and if it wants
+	 * server could even support copy of range where source = target
+	 */
+	lock_two_nondirectories(target_inode, src_inode);
+
+	cifs_dbg(FYI, "about to flush pages\n");
+	/* should we flush first and last page first */
+	truncate_inode_pages(&target_inode->i_data, 0);
+
+	if (target_tcon->ses->server->ops->copychunk_range)
+		rc = target_tcon->ses->server->ops->copychunk_range(xid,
+			smb_file_src, smb_file_target, off, len, destoff);
+	else
+		rc = -EOPNOTSUPP;
+
+	/* force revalidate of size and timestamps of target file now
+	 * that target is updated on the server
+	 */
+	CIFS_I(target_inode)->time = 0;
+	/* although unlocking in the reverse order from locking is not
+	 * strictly necessary here it is a little cleaner to be consistent
+	 */
+	unlock_two_nondirectories(src_inode, target_inode);
+
+out:
+	return rc;
+}
+
+static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
+				struct file *dst_file, loff_t destoff,
+				size_t len, unsigned int flags)
+{
+	unsigned int xid = get_xid();
+	ssize_t rc;
+
+	rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
+					len, flags);
+	free_xid(xid);
+	return rc;
+}
+
 const struct file_operations cifs_file_ops = {
 	.read_iter = cifs_loose_read_iter,
 	.write_iter = cifs_file_write_iter,
@@ -984,6 +1064,7 @@ const struct file_operations cifs_file_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1001,6 +1082,7 @@ const struct file_operations cifs_file_strict_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1018,6 +1100,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
@@ -1035,6 +1118,7 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1051,6 +1135,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1067,6 +1152,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
@@ -1078,6 +1164,7 @@ const struct file_operations cifs_dir_ops = {
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = generic_file_llseek,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index da717fee3026..30bf89b1fd9a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -139,6 +139,11 @@ extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 # define cifs_listxattr NULL
 #endif
 
+extern ssize_t cifs_file_copychunk_range(unsigned int xid,
+					struct file *src_file, loff_t off,
+					struct file *dst_file, loff_t destoff,
+					size_t len, unsigned int flags);
+
 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d42dd3288647..d07f13a63369 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -243,6 +243,7 @@ struct smb_version_operations {
 	/* verify the message */
 	int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
 	bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
+	int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
 	void (*downgrade_oplock)(struct TCP_Server_Info *,
 					struct cifsInodeInfo *, bool);
 	/* process transaction2 response */
@@ -407,9 +408,10 @@ struct smb_version_operations {
 	char * (*create_lease_buf)(u8 *, u8);
 	/* parse lease context buffer and return oplock/epoch info */
 	__u8 (*parse_lease_buf)(void *, unsigned int *);
-	int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
-			struct cifsFileInfo *target_file, u64 src_off, u64 len,
-			u64 dest_off);
+	ssize_t (*copychunk_range)(const unsigned int,
+			struct cifsFileInfo *src_file,
+			struct cifsFileInfo *target_file,
+			u64 src_off, u64 len, u64 dest_off);
 	int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
 			struct cifsFileInfo *target_file, u64 src_off, u64 len,
 			u64 dest_off);
@@ -1343,6 +1345,7 @@ struct mid_q_entry {
 	void *callback_data;	  /* general purpose pointer for callback */
 	void *resp_buf;		/* pointer to received SMB header */
 	int mid_state;	/* wish this were enum but can not pass to wait_event */
+	unsigned int mid_flags;
 	__le16 command;		/* smb command code */
 	bool large_buf:1;	/* if valid response, is pointer to large buf */
 	bool multiRsp:1;	/* multiple trans2 responses for one request  */
@@ -1350,6 +1353,12 @@ struct mid_q_entry {
 	bool decrypted:1;	/* decrypted entry */
 };
 
+struct close_cancelled_open {
+	struct cifs_fid         fid;
+	struct cifs_tcon        *tcon;
+	struct work_struct      work;
+};
+
 /*	Make code in transport.c a little cleaner by moving
 	update of optional stats into function below */
 #ifdef CONFIG_CIFS_STATS2
@@ -1481,6 +1490,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 #define   MID_RESPONSE_MALFORMED 0x10
 #define   MID_SHUTDOWN		 0x20
 
+/* Flags */
+#define   MID_WAIT_CANCELLED	 1 /* Cancelled while waiting for response */
+
 /* Types of response buffer returned from SendReceive2 */
 #define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
 #define   CIFS_SMALL_BUFFER     1
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 97e5d236d265..ec5e5e514fdd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -79,7 +79,8 @@ extern void cifs_delete_mid(struct mid_q_entry *mid);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
 				struct mid_q_entry *mid);
-extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
+extern int cifs_discard_remaining_data(struct TCP_Server_Info *server,
+				       char *buf);
 extern int cifs_call_async(struct TCP_Server_Info *server,
 			struct smb_rqst *rqst,
 			mid_receive_t *receive, mid_callback_t *callback,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 066950671929..967b92631807 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1400,9 +1400,9 @@ openRetry:
  * current bigbuf.
  */
 int
-cifs_discard_remaining_data(struct TCP_Server_Info *server)
+cifs_discard_remaining_data(struct TCP_Server_Info *server, char *buf)
 {
-	unsigned int rfclen = get_rfc1002_length(server->smallbuf);
+	unsigned int rfclen = get_rfc1002_length(buf);
 	int remaining = rfclen + 4 - server->total_read;
 
 	while (remaining > 0) {
@@ -1426,7 +1426,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	int length;
 	struct cifs_readdata *rdata = mid->callback_data;
 
-	length = cifs_discard_remaining_data(server);
+	length = cifs_discard_remaining_data(server, mid->resp_buf);
 	dequeue_mid(mid, rdata->result);
 	return length;
 }
@@ -1459,7 +1459,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, 0)) {
-		cifs_discard_remaining_data(server);
+		cifs_discard_remaining_data(server, buf);
 		return -1;
 	}
 
@@ -1519,6 +1519,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
 		 rdata->iov[0].iov_base, server->total_read);
 
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
+
 	/* how much data is in the response? */
 	data_len = server->ops->read_data_length(buf);
 	if (data_offset + data_len > buflen) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9ae695ae3ed7..0c7596cef4b8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -904,10 +904,19 @@ cifs_demultiplex_thread(void *p)
 
 		server->lstrp = jiffies;
 		if (mid_entry != NULL) {
+			if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
+			     mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
+					server->ops->handle_cancelled_mid)
+				server->ops->handle_cancelled_mid(
+							mid_entry->resp_buf,
+							server);
+
 			if (!mid_entry->multiRsp || mid_entry->multiEnd)
 				mid_entry->callback(mid_entry);
-		} else if (!server->ops->is_oplock_break ||
-			   !server->ops->is_oplock_break(buf, server)) {
+		} else if (server->ops->is_oplock_break &&
+			   server->ops->is_oplock_break(buf, server)) {
+			cifs_dbg(FYI, "Received oplock break\n");
+		} else {
 			cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
 				 atomic_read(&midCount));
 			cifs_dump_mem("Received Data is: ", buf,
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 001528781b6b..265c45fe4ea5 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,71 +34,14 @@
 #include "cifs_ioctl.h"
 #include <linux/btrfs.h>
 
-static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
-			  struct file *dst_file)
-{
-	struct inode *src_inode = file_inode(src_file);
-	struct inode *target_inode = file_inode(dst_file);
-	struct cifsFileInfo *smb_file_src;
-	struct cifsFileInfo *smb_file_target;
-	struct cifs_tcon *src_tcon;
-	struct cifs_tcon *target_tcon;
-	int rc;
-
-	cifs_dbg(FYI, "ioctl clone range\n");
-
-	if (!src_file->private_data || !dst_file->private_data) {
-		rc = -EBADF;
-		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
-		goto out;
-	}
-
-	rc = -EXDEV;
-	smb_file_target = dst_file->private_data;
-	smb_file_src = src_file->private_data;
-	src_tcon = tlink_tcon(smb_file_src->tlink);
-	target_tcon = tlink_tcon(smb_file_target->tlink);
-
-	if (src_tcon->ses != target_tcon->ses) {
-		cifs_dbg(VFS, "source and target of copy not on same server\n");
-		goto out;
-	}
-
-	/*
-	 * Note: cifs case is easier than btrfs since server responsible for
-	 * checks for proper open modes and file type and if it wants
-	 * server could even support copy of range where source = target
-	 */
-	lock_two_nondirectories(target_inode, src_inode);
-
-	cifs_dbg(FYI, "about to flush pages\n");
-	/* should we flush first and last page first */
-	truncate_inode_pages(&target_inode->i_data, 0);
-
-	if (target_tcon->ses->server->ops->clone_range)
-		rc = target_tcon->ses->server->ops->clone_range(xid,
-			smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
-	else
-		rc = -EOPNOTSUPP;
-
-	/* force revalidate of size and timestamps of target file now
-	   that target is updated on the server */
-	CIFS_I(target_inode)->time = 0;
-	/* although unlocking in the reverse order from locking is not
-	   strictly necessary here it is a little cleaner to be consistent */
-	unlock_two_nondirectories(src_inode, target_inode);
-out:
-	return rc;
-}
-
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
 			unsigned long srcfd)
 {
 	int rc;
 	struct fd src_file;
 	struct inode *src_inode;
 
-	cifs_dbg(FYI, "ioctl clone range\n");
+	cifs_dbg(FYI, "ioctl copychunk range\n");
 	/* the destination must be opened for writing */
 	if (!(dst_file->f_mode & FMODE_WRITE)) {
 		cifs_dbg(FYI, "file target not open for write\n");
@@ -129,7 +72,8 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
 	if (S_ISDIR(src_inode->i_mode))
 		goto out_fput;
 
-	rc = cifs_file_clone_range(xid, src_file.file, dst_file);
+	rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0,
+					src_inode->i_size, 0);
 
 out_fput:
 	fdput(src_file);
@@ -251,7 +195,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			}
 			break;
 		case CIFS_IOC_COPYCHUNK_FILE:
-			rc = cifs_ioctl_clone(xid, filep, arg);
+			rc = cifs_ioctl_copychunk(xid, filep, arg);
 			break;
 		case CIFS_IOC_SET_INTEGRITY:
 			if (pSMBFile == NULL)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index fd516ea8b8f8..1a04b3a5beb1 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -659,3 +659,49 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 	cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
 	return false;
 }
+
+void
+smb2_cancelled_close_fid(struct work_struct *work)
+{
+	struct close_cancelled_open *cancelled = container_of(work,
+					struct close_cancelled_open, work);
+
+	cifs_dbg(VFS, "Close unmatched open\n");
+
+	SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid,
+		   cancelled->fid.volatile_fid);
+	cifs_put_tcon(cancelled->tcon);
+	kfree(cancelled);
+}
+
+int
+smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+{
+	struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+	struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+	struct cifs_tcon *tcon;
+	struct close_cancelled_open *cancelled;
+
+	if (sync_hdr->Command != SMB2_CREATE ||
+	    sync_hdr->Status != STATUS_SUCCESS)
+		return 0;
+
+	cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
+	if (!cancelled)
+		return -ENOMEM;
+
+	tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId,
+				  sync_hdr->TreeId);
+	if (!tcon) {
+		kfree(cancelled);
+		return -ENOENT;
+	}
+
+	cancelled->fid.persistent_fid = rsp->PersistentFileId;
+	cancelled->fid.volatile_fid = rsp->VolatileFileId;
+	cancelled->tcon = tcon;
+	INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
+	queue_work(cifsiod_wq, &cancelled->work);
+
+	return 0;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0231108d9387..7b12a727947e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <linux/falloc.h>
 #include <linux/scatterlist.h>
+#include <linux/uuid.h>
 #include <crypto/aead.h>
 #include "cifsglob.h"
 #include "smb2pdu.h"
@@ -592,8 +593,8 @@ req_res_key_exit:
 	return rc;
 }
 
-static int
-smb2_clone_range(const unsigned int xid,
+static ssize_t
+smb2_copychunk_range(const unsigned int xid,
 			struct cifsFileInfo *srcfile,
 			struct cifsFileInfo *trgtfile, u64 src_off,
 			u64 len, u64 dest_off)
@@ -605,13 +606,14 @@ smb2_clone_range(const unsigned int xid,
 	struct cifs_tcon *tcon;
 	int chunks_copied = 0;
 	bool chunk_sizes_updated = false;
+	ssize_t bytes_written, total_bytes_written = 0;
 
 	pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
 
 	if (pcchunk == NULL)
 		return -ENOMEM;
 
-	cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n");
+	cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n");
 	/* Request a key from the server to identify the source of the copy */
 	rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink),
 				srcfile->fid.persistent_fid,
@@ -669,14 +671,16 @@ smb2_clone_range(const unsigned int xid,
 			}
 			chunks_copied++;
 
-			src_off += le32_to_cpu(retbuf->TotalBytesWritten);
-			dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
-			len -= le32_to_cpu(retbuf->TotalBytesWritten);
+			bytes_written = le32_to_cpu(retbuf->TotalBytesWritten);
+			src_off += bytes_written;
+			dest_off += bytes_written;
+			len -= bytes_written;
+			total_bytes_written += bytes_written;
 
-			cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
+			cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n",
 				le32_to_cpu(retbuf->ChunksWritten),
 				le32_to_cpu(retbuf->ChunkBytesWritten),
-				le32_to_cpu(retbuf->TotalBytesWritten));
+				bytes_written);
 		} else if (rc == -EINVAL) {
 			if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
 				goto cchunk_out;
@@ -713,7 +717,10 @@ smb2_clone_range(const unsigned int xid,
 cchunk_out:
 	kfree(pcchunk);
 	kfree(retbuf);
-	return rc;
+	if (rc)
+		return rc;
+	else
+		return total_bytes_written;
 }
 
 static int
@@ -2188,7 +2195,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	if (rc)
 		goto free_pages;
 
-	rc = cifs_discard_remaining_data(server);
+	rc = cifs_discard_remaining_data(server, buf);
 	if (rc)
 		goto free_pages;
 
@@ -2214,7 +2221,7 @@ free_pages:
 	kfree(pages);
 	return rc;
 discard_data:
-	cifs_discard_remaining_data(server);
+	cifs_discard_remaining_data(server, buf);
 	goto free_pages;
 }
 
@@ -2322,6 +2329,7 @@ struct smb_version_operations smb20_operations = {
 	.clear_stats = smb2_clear_stats,
 	.print_stats = smb2_print_stats,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2377,7 +2385,7 @@ struct smb_version_operations smb20_operations = {
 	.set_oplock_level = smb2_set_oplock_level,
 	.create_lease_buf = smb2_create_lease_buf,
 	.parse_lease_buf = smb2_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.get_dfs_refer = smb2_get_dfs_refer,
@@ -2404,6 +2412,7 @@ struct smb_version_operations smb21_operations = {
 	.clear_stats = smb2_clear_stats,
 	.print_stats = smb2_print_stats,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2459,7 +2468,7 @@ struct smb_version_operations smb21_operations = {
 	.set_oplock_level = smb21_set_oplock_level,
 	.create_lease_buf = smb2_create_lease_buf,
 	.parse_lease_buf = smb2_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
@@ -2488,6 +2497,7 @@ struct smb_version_operations smb30_operations = {
 	.print_stats = smb2_print_stats,
 	.dump_share_caps = smb2_dump_share_caps,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2545,7 +2555,7 @@ struct smb_version_operations smb30_operations = {
 	.set_oplock_level = smb3_set_oplock_level,
 	.create_lease_buf = smb3_create_lease_buf,
 	.parse_lease_buf = smb3_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.duplicate_extents = smb2_duplicate_extents,
 	.validate_negotiate = smb3_validate_negotiate,
 	.wp_retry_size = smb2_wp_retry_size,
@@ -2582,6 +2592,7 @@ struct smb_version_operations smb311_operations = {
 	.print_stats = smb2_print_stats,
 	.dump_share_caps = smb2_dump_share_caps,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2639,7 +2650,7 @@ struct smb_version_operations smb311_operations = {
 	.set_oplock_level = smb3_set_oplock_level,
 	.create_lease_buf = smb3_create_lease_buf,
 	.parse_lease_buf = smb3_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.duplicate_extents = smb2_duplicate_extents,
 /*	.validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
 	.wp_retry_size = smb2_wp_retry_size,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7446496850a3..66fa1b941cdf 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1185,6 +1185,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 		return -EINVAL;
 	}
 
+	/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
+	if (tcon)
+		tcon->tid = 0;
+
 	rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
 	if (rc) {
 		kfree(unc_path);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 69e35873b1de..6853454fc871 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -48,6 +48,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
 			      struct smb_rqst *rqst);
 extern struct mid_q_entry *smb2_setup_async_request(
 			struct TCP_Server_Info *server, struct smb_rqst *rqst);
+extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
+					   __u64 ses_id);
+extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
+						__u64 ses_id, __u32  tid);
 extern int smb2_calc_signature(struct smb_rqst *rqst,
 				struct TCP_Server_Info *server);
 extern int smb3_calc_signature(struct smb_rqst *rqst,
@@ -164,6 +168,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 			     const u64 persistent_fid, const u64 volatile_fid,
 			     const __u8 oplock_level);
+extern int smb2_handle_cancelled_mid(char *buffer,
+					struct TCP_Server_Info *server);
+void smb2_cancelled_close_fid(struct work_struct *work);
 extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_file_id, u64 volatile_file_id,
 			 struct kstatfs *FSData);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 7c3bb1bd7eed..506b67fc93d9 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -115,23 +115,70 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
 	return 0;
 }
 
-struct cifs_ses *
-smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
+static struct cifs_ses *
+smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
 {
 	struct cifs_ses *ses;
 
-	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
 		if (ses->Suid != ses_id)
 			continue;
-		spin_unlock(&cifs_tcp_ses_lock);
 		return ses;
 	}
+
+	return NULL;
+}
+
+struct cifs_ses *
+smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
+{
+	struct cifs_ses *ses;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	ses = smb2_find_smb_ses_unlocked(server, ses_id);
 	spin_unlock(&cifs_tcp_ses_lock);
 
+	return ses;
+}
+
+static struct cifs_tcon *
+smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32  tid)
+{
+	struct cifs_tcon *tcon;
+
+	list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+		if (tcon->tid != tid)
+			continue;
+		++tcon->tc_count;
+		return tcon;
+	}
+
 	return NULL;
 }
 
+/*
+ * Obtain tcon corresponding to the tid in the given
+ * cifs_ses
+ */
+
+struct cifs_tcon *
+smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32  tid)
+{
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	ses = smb2_find_smb_ses_unlocked(server, ses_id);
+	if (!ses) {
+		spin_unlock(&cifs_tcp_ses_lock);
+		return NULL;
+	}
+	tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	return tcon;
+}
+
 int
 smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 526f0533cb4e..f6e13a977fc8 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -752,9 +752,11 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 
 	rc = wait_for_response(ses->server, midQ);
 	if (rc != 0) {
+		cifs_dbg(FYI, "Cancelling wait for mid %llu\n",	midQ->mid);
 		send_cancel(ses->server, rqst, midQ);
 		spin_lock(&GlobalMid_Lock);
 		if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
+			midQ->mid_flags |= MID_WAIT_CANCELLED;
 			midQ->callback = DeleteMidQEntry;
 			spin_unlock(&GlobalMid_Lock);
 			add_credits(ses->server, 1, optype);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 02a7a9286449..6d6eca394d4d 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page);
 static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *dir;
-	struct fscrypt_info *ci;
 	int dir_has_key, cached_with_key;
 
 	if (flags & LOOKUP_RCU)
@@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 	}
 
-	ci = d_inode(dir)->i_crypt_info;
-	if (ci && ci->ci_keyring_key &&
-	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					  (1 << KEY_FLAG_REVOKED) |
-					  (1 << KEY_FLAG_DEAD))))
-		ci = NULL;
-
 	/* this should eventually be an flag in d_flags */
 	spin_lock(&dentry->d_lock);
 	cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
 	spin_unlock(&dentry->d_lock);
-	dir_has_key = (ci != NULL);
+	dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
 	dput(dir);
 
 	/*
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 13052b85c393..37b49894c762 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
 		fname->disk_name.len = iname->len;
 		return 0;
 	}
-	ret = fscrypt_get_crypt_info(dir);
+	ret = fscrypt_get_encryption_info(dir);
 	if (ret && ret != -EOPNOTSUPP)
 		return ret;
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fdbb8af32eaf..e39696e64494 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -67,7 +67,6 @@ struct fscrypt_info {
 	u8 ci_filename_mode;
 	u8 ci_flags;
 	struct crypto_skcipher *ci_ctfm;
-	struct key *ci_keyring_key;
 	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
 };
 
@@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
 extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
 					      gfp_t gfp_flags);
 
-/* keyinfo.c */
-extern int fscrypt_get_crypt_info(struct inode *);
-
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index d5d896fa5a71..8cdfddce2b34 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 	kfree(description);
 	if (IS_ERR(keyring_key))
 		return PTR_ERR(keyring_key);
+	down_read(&keyring_key->sem);
 
 	if (keyring_key->type != &key_type_logon) {
 		printk_once(KERN_WARNING
@@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 		res = -ENOKEY;
 		goto out;
 	}
-	down_read(&keyring_key->sem);
 	ukp = user_key_payload_locked(keyring_key);
 	if (ukp->datalen != sizeof(struct fscrypt_key)) {
 		res = -EINVAL;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	master_key = (struct fscrypt_key *)ukp->data;
@@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 				"%s: key size incorrect: %d\n",
 				__func__, master_key->size);
 		res = -ENOKEY;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
-	up_read(&keyring_key->sem);
-	if (res)
-		goto out;
-
-	crypt_info->ci_keyring_key = keyring_key;
-	return 0;
 out:
+	up_read(&keyring_key->sem);
 	key_put(keyring_key);
 	return res;
 }
@@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci)
 	if (!ci)
 		return;
 
-	key_put(ci->ci_keyring_key);
 	crypto_free_skcipher(ci->ci_ctfm);
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
-int fscrypt_get_crypt_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
@@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	u8 *raw_key = NULL;
 	int res;
 
+	if (inode->i_crypt_info)
+		return 0;
+
 	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
 	if (res)
 		return res;
 
 	if (!inode->i_sb->s_cop->get_context)
 		return -EOPNOTSUPP;
-retry:
-	crypt_info = ACCESS_ONCE(inode->i_crypt_info);
-	if (crypt_info) {
-		if (!crypt_info->ci_keyring_key ||
-				key_validate(crypt_info->ci_keyring_key) == 0)
-			return 0;
-		fscrypt_put_encryption_info(inode, crypt_info);
-		goto retry;
-	}
 
 	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
 	if (res < 0) {
@@ -229,7 +215,6 @@ retry:
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_keyring_key = NULL;
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 
@@ -273,14 +258,8 @@ retry:
 	if (res)
 		goto out;
 
-	kzfree(raw_key);
-	raw_key = NULL;
-	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
-		put_crypt_info(crypt_info);
-		goto retry;
-	}
-	return 0;
-
+	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+		crypt_info = NULL;
 out:
 	if (res == -ENOKEY)
 		res = 0;
@@ -288,6 +267,7 @@ out:
 	kzfree(raw_key);
 	return res;
 }
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
 
 void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 {
@@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 	put_crypt_info(ci);
 }
 EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
-	struct fscrypt_info *ci = inode->i_crypt_info;
-
-	if (!ci ||
-		(ci->ci_keyring_key &&
-		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					       (1 << KEY_FLAG_REVOKED) |
-					       (1 << KEY_FLAG_DEAD)))))
-		return fscrypt_get_crypt_info(inode);
-	return 0;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 14b76da71269..4908906d54d5 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode,
 				const struct fscrypt_policy *policy)
 {
 	struct fscrypt_context ctx;
-	int res;
 
 	if (!inode->i_sb->s_cop->set_context)
 		return -EOPNOTSUPP;
 
-	if (inode->i_sb->s_cop->prepare_context) {
-		res = inode->i_sb->s_cop->prepare_context(inode);
-		if (res)
-			return res;
-	}
-
 	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
 					FS_KEY_DESCRIPTOR_SIZE);
diff --git a/fs/dax.c b/fs/dax.c
index de622d4282a6..85abd741253d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
 		}
 		spin_lock_irq(&mapping->tree_lock);
 
+		if (!entry) {
+			/*
+			 * We needed to drop the page_tree lock while calling
+			 * radix_tree_preload() and we didn't have an entry to
+			 * lock.  See if another thread inserted an entry at
+			 * our index during this time.
+			 */
+			entry = __radix_tree_lookup(&mapping->page_tree, index,
+					NULL, &slot);
+			if (entry) {
+				radix_tree_preload_end();
+				spin_unlock_irq(&mapping->tree_lock);
+				goto restart;
+			}
+		}
+
 		if (pmd_downgrade) {
 			radix_tree_delete(&mapping->page_tree, index);
 			mapping->nrexceptional--;
@@ -388,19 +404,12 @@ restart:
 		if (err) {
 			spin_unlock_irq(&mapping->tree_lock);
 			/*
-			 * Someone already created the entry?  This is a
-			 * normal failure when inserting PMDs in a range
-			 * that already contains PTEs.  In that case we want
-			 * to return -EEXIST immediately.
-			 */
-			if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
-				goto restart;
-			/*
-			 * Our insertion of a DAX PMD entry failed, most
-			 * likely because it collided with a PTE sized entry
-			 * at a different index in the PMD range.  We haven't
-			 * inserted anything into the radix tree and have no
-			 * waiters to wake.
+			 * Our insertion of a DAX entry failed, most likely
+			 * because we were inserting a PMD entry and it
+			 * collided with a PTE sized entry at a different
+			 * index in the PMD range.  We haven't inserted
+			 * anything into the radix tree and have no waiters to
+			 * wake.
 			 */
 			return ERR_PTR(err);
 		}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f493af666591..fb69ee2388db 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,6 +2466,7 @@ extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
+extern int  ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8210c1f43556..cefa9835f275 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -744,7 +744,7 @@ const struct file_operations ext4_file_operations = {
 
 const struct inode_operations ext4_file_inode_operations = {
 	.setattr	= ext4_setattr,
-	.getattr	= ext4_getattr,
+	.getattr	= ext4_file_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 30a9f210d1e3..375fb1c05d49 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
 	set_buffer_uptodate(dir_block);
 	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
 	if (err)
-		goto out;
+		return err;
 	set_buffer_verified(dir_block);
-out:
-	return err;
+	return ext4_mark_inode_dirty(handle, inode);
 }
 
 static int ext4_convert_inline_data_nolock(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7385e6a6b6cb..b9ffa9f4191f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5390,17 +5390,52 @@ err_out:
 int ext4_getattr(const struct path *path, struct kstat *stat,
 		 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode;
-	unsigned long long delalloc_blocks;
+	struct inode *inode = d_inode(path->dentry);
+	struct ext4_inode *raw_inode;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned int flags;
+
+	if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
+		stat->result_mask |= STATX_BTIME;
+		stat->btime.tv_sec = ei->i_crtime.tv_sec;
+		stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
+	}
+
+	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+	if (flags & EXT4_APPEND_FL)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (flags & EXT4_COMPR_FL)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (flags & EXT4_ENCRYPT_FL)
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+	if (flags & EXT4_IMMUTABLE_FL)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (flags & EXT4_NODUMP_FL)
+		stat->attributes |= STATX_ATTR_NODUMP;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_ENCRYPTED |
+				  STATX_ATTR_IMMUTABLE |
+				  STATX_ATTR_NODUMP);
 
-	inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
+	return 0;
+}
+
+int ext4_file_getattr(const struct path *path, struct kstat *stat,
+		      u32 request_mask, unsigned int query_flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+	u64 delalloc_blocks;
+
+	ext4_getattr(path, stat, request_mask, query_flags);
 
 	/*
 	 * If there is inline data in the inode, the inode will normally not
 	 * have data blocks allocated (it may have an external xattr block).
 	 * Report at least one sector for such files, so tools like tar, rsync,
-	 * others doen't incorrectly think the file is completely sparse.
+	 * others don't incorrectly think the file is completely sparse.
 	 */
 	if (unlikely(ext4_has_inline_data(inode)))
 		stat->blocks += (stat->size + 511) >> 9;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 578f8c33fb44..c992ef2c2f94 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode,
 	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
 	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
 		ext4_debug("ext4 move extent: orig and donor's start "
-			"offset are not alligned [ino:orig %lu, donor %lu]\n",
+			"offsets are not aligned [ino:orig %lu, donor %lu]\n",
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6ad612c576fc..07e5e1405771 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3912,6 +3912,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 	.tmpfile	= ext4_tmpfile,
 	.rename		= ext4_rename2,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
@@ -3920,6 +3921,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 
 const struct inode_operations ext4_special_inode_operations = {
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2e03a0a88d92..a9448db1cf7e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
 				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
 }
 
-static int ext4_prepare_context(struct inode *inode)
-{
-	return ext4_convert_inline_data(inode);
-}
-
 static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 							void *fs_data)
 {
 	handle_t *handle = fs_data;
 	int res, res2, retries = 0;
 
+	res = ext4_convert_inline_data(inode);
+	if (res)
+		return res;
+
 	/*
 	 * If a journal handle was specified, then the encryption context is
 	 * being set on a new inode via inheritance and is part of a larger
@@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode)
 static const struct fscrypt_operations ext4_cryptops = {
 	.key_prefix		= "ext4:",
 	.get_context		= ext4_get_context,
-	.prepare_context	= ext4_prepare_context,
 	.set_context		= ext4_set_context,
 	.dummy_context		= ext4_dummy_context,
 	.is_encrypted		= ext4_encrypted_inode,
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 73b184d161fc..5c8fc53cb0e5 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -85,17 +85,20 @@ errout:
 const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 	.get_link	= ext4_encrypted_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };
 
 const struct inode_operations ext4_symlink_inode_operations = {
 	.get_link	= page_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };
 
 const struct inode_operations ext4_fast_symlink_inode_operations = {
 	.get_link	= simple_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 67636acf7624..996e7900d4c8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
 }
 
 static int ext4_xattr_block_csum_verify(struct inode *inode,
-					sector_t block_nr,
-					struct ext4_xattr_header *hdr)
+					struct buffer_head *bh)
 {
-	if (ext4_has_metadata_csum(inode->i_sb) &&
-	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
-		return 0;
-	return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
-				      sector_t block_nr,
-				      struct ext4_xattr_header *hdr)
-{
-	if (!ext4_has_metadata_csum(inode->i_sb))
-		return;
+	struct ext4_xattr_header *hdr = BHDR(bh);
+	int ret = 1;
 
-	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+	if (ext4_has_metadata_csum(inode->i_sb)) {
+		lock_buffer(bh);
+		ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+							bh->b_blocknr, hdr));
+		unlock_buffer(bh);
+	}
+	return ret;
 }
 
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
-						struct inode *inode,
-						struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+				      struct buffer_head *bh)
 {
-	ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
-	return ext4_handle_dirty_metadata(handle, inode, bh);
+	if (ext4_has_metadata_csum(inode->i_sb))
+		BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+						bh->b_blocknr, BHDR(bh));
 }
 
 static inline const struct xattr_handler *
@@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EFSCORRUPTED;
-	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+	if (!ext4_xattr_block_csum_verify(inode, bh))
 		return -EFSBADCRC;
 	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
 				       bh->b_data);
@@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			}
 		}
 
+		ext4_xattr_block_csum_set(inode, bh);
 		/*
 		 * Beware of this ugliness: Releasing of xattr block references
 		 * from different inodes can race and so we have to protect
 		 * from a race where someone else frees the block (and releases
 		 * its journal_head) before we are done dirtying the buffer. In
 		 * nojournal mode this race is harmless and we actually cannot
-		 * call ext4_handle_dirty_xattr_block() with locked buffer as
+		 * call ext4_handle_dirty_metadata() with locked buffer as
 		 * that function can call sync_dirty_buffer() so for that case
 		 * we handle the dirtying after unlocking the buffer.
 		 */
 		if (ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		unlock_buffer(bh);
 		if (!ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
 		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				ext4_xattr_cache_insert(ext4_mb_cache,
 					bs->bh);
 			}
+			ext4_xattr_block_csum_set(inode, bs->bh);
 			unlock_buffer(bs->bh);
 			if (error == -EFSCORRUPTED)
 				goto bad_block;
 			if (!error)
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      bs->bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   bs->bh);
 			if (error)
 				goto cleanup;
 			goto inserted;
@@ -967,10 +962,11 @@ inserted:
 					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);
+				ext4_xattr_block_csum_set(inode, new_bh);
 				unlock_buffer(new_bh);
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      new_bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   new_bh);
 				if (error)
 					goto cleanup_dquot;
 			}
@@ -1020,11 +1016,12 @@ getblk_failed:
 				goto getblk_failed;
 			}
 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
+			ext4_xattr_block_csum_set(inode, new_bh);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
 			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-			error = ext4_handle_dirty_xattr_block(handle,
-							      inode, new_bh);
+			error = ext4_handle_dirty_metadata(handle, inode,
+							   new_bh);
 			if (error)
 				goto cleanup;
 		}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e8..ee2d0a485fc3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
 	si->base_mem += NM_I(sbi)->nat_blocks / 8;
+	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de7..8d5c62b07b28 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	dentry_blk = page_address(page);
 	bit_pos = dentry - dentry_blk->dentry;
 	for (i = 0; i < slots; i++)
-		clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+		__clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
 	/* Let's check and deallocate this dentry page */
 	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d6114..0a6e115562f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
 	struct mutex build_lock;	/* lock for build free nids */
 	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
 	unsigned char *nat_block_bitmap;
+	unsigned short *free_nid_count;	/* free nid count of NAT block */
+	spinlock_t free_nid_lock;	/* protect updating of nid count */
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee8..481aa8dc79f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
-	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-		clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 		kmem_cache_free(free_nid_slab, i);
 }
 
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+			bool set, bool build, bool locked)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
 		return;
 
 	if (set)
-		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 	else
-		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+	if (!locked)
+		spin_lock(&nm_i->free_nid_lock);
+	if (set)
+		nm_i->free_nid_count[nat_ofs]++;
+	else if (!build)
+		nm_i->free_nid_count[nat_ofs]--;
+	if (!locked)
+		spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
-	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+	if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+		return;
+
+	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			freed = add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, freed);
+		update_free_nid_bitmap(sbi, start_nid, freed, true, false);
 	}
 }
 
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		if (!test_bit_le(i, nm_i->nat_block_bitmap))
 			continue;
+		if (!nm_i->free_nid_count[i])
+			continue;
 		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
 			nid_t nid;
 
@@ -1907,58 +1919,6 @@ out:
 	up_read(&nm_i->nat_tree_lock);
 }
 
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	struct page *page;
-	unsigned int i = 0;
-	nid_t nid;
-
-	if (!enabled_nat_bits(sbi, NULL))
-		return -EAGAIN;
-
-	down_read(&nm_i->nat_tree_lock);
-check_empty:
-	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		i = 0;
-		goto check_partial;
-	}
-
-	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
-									nid++) {
-		if (unlikely(nid >= nm_i->max_nid))
-			break;
-		add_free_nid(sbi, nid, true);
-	}
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
-		goto out;
-	i++;
-	goto check_empty;
-
-check_partial:
-	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		disable_nat_bits(sbi, true);
-		up_read(&nm_i->nat_tree_lock);
-		return -EINVAL;
-	}
-
-	nid = i * NAT_ENTRY_PER_BLOCK;
-	page = get_current_nat_page(sbi, nid);
-	scan_nat_page(sbi, page, nid);
-	f2fs_put_page(page, 1);
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
-		i++;
-		goto check_partial;
-	}
-out:
-	up_read(&nm_i->nat_tree_lock);
-	return 0;
-}
-
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 
 		if (nm_i->nid_cnt[FREE_NID_LIST])
 			return;
-
-		/* try to find free nids with nat_bits */
-		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-			return;
-	}
-
-	/* find next valid candidate */
-	if (enabled_nat_bits(sbi, NULL)) {
-		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
-					nm_i->nat_blocks, 0);
-
-		if (idx >= nm_i->nat_blocks)
-			set_sbi_flag(sbi, SBI_NEED_FSCK);
-		else
-			nid = idx * NAT_ENTRY_PER_BLOCK;
 	}
 
 	/* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
 
-		update_free_nid_bitmap(sbi, *nid, false);
+		update_free_nid_bitmap(sbi, *nid, false, false, false);
 
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
-	update_free_nid_bitmap(sbi, nid, true);
+	update_free_nid_bitmap(sbi, nid, true, false, false);
 
 	spin_unlock(&nm_i->nid_list_lock);
 
@@ -2383,7 +2328,7 @@ add_out:
 	list_add_tail(&nes->set_list, head);
 }
 
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 						struct page *page)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 			valid++;
 	}
 	if (valid == 0) {
-		set_bit_le(nat_index, nm_i->empty_nat_bits);
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->empty_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 		return;
 	}
 
-	clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	__clear_bit_le(nat_index, nm_i->empty_nat_bits);
 	if (valid == NAT_ENTRY_PER_BLOCK)
-		set_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->full_nat_bits);
 	else
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
-			update_free_nid_bitmap(sbi, nid, true);
+			update_free_nid_bitmap(sbi, nid, true, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		} else {
 			spin_lock(&NM_I(sbi)->nid_list_lock);
-			update_free_nid_bitmap(sbi, nid, false);
+			update_free_nid_bitmap(sbi, nid, false, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int i = 0;
+	nid_t nid, last_nid;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return;
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+
+		nid = i * NAT_ENTRY_PER_BLOCK;
+		last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+		spin_lock(&nm_i->free_nid_lock);
+		for (; nid < last_nid; nid++)
+			update_free_nid_bitmap(sbi, nid, true, true, true);
+		spin_unlock(&nm_i->free_nid_lock);
+	}
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+	}
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
 								GFP_KERNEL);
 	if (!nm_i->nat_block_bitmap)
 		return -ENOMEM;
+
+	nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+					sizeof(unsigned short), GFP_KERNEL);
+	if (!nm_i->free_nid_count)
+		return -ENOMEM;
+
+	spin_lock_init(&nm_i->free_nid_lock);
+
 	return 0;
 }
 
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	/* load free nid status from nat_bits table */
+	load_free_nid_bitmap(sbi);
+
 	build_free_nids(sbi, true, true);
 	return 0;
 }
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 
 	kvfree(nm_i->nat_block_bitmap);
 	kvfree(nm_i->free_nid_bitmap);
+	kvfree(nm_i->free_nid_count);
 
 	kfree(nm_i->nat_bitmap);
 	kfree(nm_i->nat_bits);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332..29ef7088c558 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 		if (f2fs_discard_en(sbi) &&
 			!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
+
+		/* don't overwrite by SSR to keep node chain */
+		if (se->type == CURSEG_WARM_NODE) {
+			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+				se->ckpt_valid_blocks++;
+		}
 	} else {
 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
 #ifdef CONFIG_F2FS_CHECK_FS
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8f96461236f6..7163fe014b57 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -695,14 +695,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
 
 	inode = new_inode(sb);
 	if (inode) {
-		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode->i_mode = S_IFDIR | config->mode;
 		inode->i_uid = config->uid;
 		inode->i_gid = config->gid;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
-		info = HUGETLBFS_I(inode);
-		mpol_shared_policy_init(&info->policy, NULL);
 		inode->i_op = &hugetlbfs_dir_inode_operations;
 		inode->i_fop = &simple_dir_operations;
 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -733,7 +730,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 
 	inode = new_inode(sb);
 	if (inode) {
-		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
@@ -741,15 +737,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 		inode->i_mapping->private_data = resv_map;
-		info = HUGETLBFS_I(inode);
-		/*
-		 * The policy is initialized here even if we are creating a
-		 * private inode because initialization simply creates an
-		 * an empty rb tree and calls rwlock_init(), later when we
-		 * call mpol_free_shared_policy() it will just return because
-		 * the rb tree will still be empty.
-		 */
-		mpol_shared_policy_init(&info->policy, NULL);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -937,6 +924,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 	}
+
+	/*
+	 * Any time after allocation, hugetlbfs_destroy_inode can be called
+	 * for the inode.  mpol_free_shared_policy is unconditionally called
+	 * as part of hugetlbfs_destroy_inode.  So, initialize policy here
+	 * in case of a quick call to destroy.
+	 *
+	 * Note that the policy is initialized even if we are creating a
+	 * private inode.  This simplifies hugetlbfs_destroy_inode.
+	 */
+	mpol_shared_policy_init(&p->policy, NULL);
+
 	return &p->vfs_inode;
 }
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9c..5adc2fb62b0f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
 
 	/* Set up a default-sized revoke table for the new mount. */
 	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
-	if (err) {
-		kfree(journal);
-		return NULL;
-	}
+	if (err)
+		goto err_cleanup;
 
 	spin_lock_init(&journal->j_history_lock);
 
@@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	journal->j_wbufsize = n;
 	journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
 					GFP_KERNEL);
-	if (!journal->j_wbuf) {
-		kfree(journal);
-		return NULL;
-	}
+	if (!journal->j_wbuf)
+		goto err_cleanup;
 
 	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
 		pr_err("%s: Cannot get buffer for journal superblock\n",
 			__func__);
-		kfree(journal->j_wbuf);
-		kfree(journal);
-		return NULL;
+		goto err_cleanup;
 	}
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
 
 	return journal;
+
+err_cleanup:
+	kfree(journal->j_wbuf);
+	jbd2_journal_destroy_revoke(journal);
+	kfree(journal);
+	return NULL;
 }
 
 /* jbd2_journal_init_dev and jbd2_journal_init_inode:
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index cfc38b552118..f9aefcda5854 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 
 fail1:
 	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+	journal->j_revoke_table[0] = NULL;
 fail0:
 	return -ENOMEM;
 }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8e4dc7ab584c..ac2dfe0c5a9c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 		if (kn->flags & KERNFS_HAS_MMAP)
 			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 
-		kernfs_release_file(kn, of);
+		if (kn->flags & KERNFS_HAS_RELEASE)
+			kernfs_release_file(kn, of);
 	}
 
 	mutex_unlock(&kernfs_open_file_mutex);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index bb79972dc638..773774531aff 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -232,12 +232,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_module		= THIS_MODULE,
 };
 
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = &nfs41_cb_sv_ops,
 };
 #else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = NULL,
 };
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 91a8d610ba0f..390ada8741bc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	return NULL;
 }
 
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
 {
 	return clp->cl_cons_state <= NFS_CS_READY;
 }
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+	/* called without checking nfs_client_init_is_complete */
+	if (clp->cl_cons_state > NFS_CS_READY) {
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+	return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
 
 int nfs_wait_client_init_complete(const struct nfs_client *clp)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fb499a3f21b5..f92ba8d6c556 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
-	struct dentry *dentry = NULL, *rehash = NULL;
+	struct dentry *dentry = NULL;
 	struct rpc_task *task;
 	int error = -EBUSY;
 
@@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * To prevent any new references to the target during the
 		 * rename, we unhash the dentry in advance.
 		 */
-		if (!d_unhashed(new_dentry)) {
+		if (!d_unhashed(new_dentry))
 			d_drop(new_dentry);
-			rehash = new_dentry;
-		}
 
 		if (d_count(new_dentry) > 2) {
 			int err;
@@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 
 			new_dentry = dentry;
-			rehash = NULL;
 			new_inode = NULL;
 		}
 	}
@@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		error = task->tk_status;
 	rpc_put_task(task);
 out:
-	if (rehash)
-		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
 	/* new dentry created? */
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 44347f4bdc15..acd30baca461 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 			task->tk_status);
 		nfs4_mark_deviceid_unavailable(devid);
 		pnfs_error_mark_layout_for_return(inode, lseg);
-		pnfs_set_lo_fail(lseg);
 		rpc_wake_up(&tbl->slot_tbl_waitq);
 		/* fall through */
 	default:
+		pnfs_set_lo_fail(lseg);
 reset:
 		dprintk("%s Retry through MDS. Error %d\n", __func__,
 			task->tk_status);
@@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	return PNFS_ATTEMPTED;
 }
 
+static int
+filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
+			  struct nfs4_filelayout_segment *fl,
+			  gfp_t gfp_flags)
+{
+	struct nfs4_deviceid_node *d;
+	struct nfs4_file_layout_dsaddr *dsaddr;
+	int status = -EINVAL;
+
+	/* find and reference the deviceid */
+	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
+			lo->plh_lc_cred, gfp_flags);
+	if (d == NULL)
+		goto out;
+
+	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+	/* Found deviceid is unavailable */
+	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
+		goto out_put;
+
+	fl->dsaddr = dsaddr;
+
+	if (fl->first_stripe_index >= dsaddr->stripe_count) {
+		dprintk("%s Bad first_stripe_index %u\n",
+				__func__, fl->first_stripe_index);
+		goto out_put;
+	}
+
+	if ((fl->stripe_type == STRIPE_SPARSE &&
+	    fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
+	    (fl->stripe_type == STRIPE_DENSE &&
+	    fl->num_fh != dsaddr->stripe_count)) {
+		dprintk("%s num_fh %u not valid for given packing\n",
+			__func__, fl->num_fh);
+		goto out_put;
+	}
+	status = 0;
+out:
+	return status;
+out_put:
+	nfs4_fl_put_deviceid(dsaddr);
+	goto out;
+}
+
 /*
  * filelayout_check_layout()
  *
@@ -572,11 +616,8 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_filelayout_segment *fl,
 			struct nfs4_layoutget_res *lgr,
-			struct nfs4_deviceid *id,
 			gfp_t gfp_flags)
 {
-	struct nfs4_deviceid_node *d;
-	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
 
 	dprintk("--> %s\n", __func__);
@@ -601,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 		goto out;
 	}
 
-	/* find and reference the deviceid */
-	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id,
-			lo->plh_lc_cred, gfp_flags);
-	if (d == NULL)
-		goto out;
-
-	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
-	/* Found deviceid is unavailable */
-	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
-		goto out_put;
-
-	fl->dsaddr = dsaddr;
-
-	if (fl->first_stripe_index >= dsaddr->stripe_count) {
-		dprintk("%s Bad first_stripe_index %u\n",
-				__func__, fl->first_stripe_index);
-		goto out_put;
-	}
-
-	if ((fl->stripe_type == STRIPE_SPARSE &&
-	    fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
-	    (fl->stripe_type == STRIPE_DENSE &&
-	    fl->num_fh != dsaddr->stripe_count)) {
-		dprintk("%s num_fh %u not valid for given packing\n",
-			__func__, fl->num_fh);
-		goto out_put;
-	}
-
 	status = 0;
 out:
 	dprintk("--> %s returns %d\n", __func__, status);
 	return status;
-out_put:
-	nfs4_fl_put_deviceid(dsaddr);
-	goto out;
 }
 
 static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
@@ -657,7 +667,6 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 struct nfs4_filelayout_segment *fl,
 			 struct nfs4_layoutget_res *lgr,
-			 struct nfs4_deviceid *id,
 			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
@@ -682,9 +691,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 	if (unlikely(!p))
 		goto out_err;
 
-	memcpy(id, p, sizeof(*id));
+	memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
 	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
-	nfs4_print_deviceid(id);
+	nfs4_print_deviceid(&fl->deviceid);
 
 	nfl_util = be32_to_cpup(p++);
 	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -831,15 +840,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 {
 	struct nfs4_filelayout_segment *fl;
 	int rc;
-	struct nfs4_deviceid id;
 
 	dprintk("--> %s\n", __func__);
 	fl = kzalloc(sizeof(*fl), gfp_flags);
 	if (!fl)
 		return NULL;
 
-	rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
-	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
+	rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
+	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
@@ -888,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	return min(stripe_unit - (unsigned int)stripe_offset, size);
 }
 
+static struct pnfs_layout_segment *
+fl_pnfs_update_layout(struct inode *ino,
+		      struct nfs_open_context *ctx,
+		      loff_t pos,
+		      u64 count,
+		      enum pnfs_iomode iomode,
+		      bool strict_iomode,
+		      gfp_t gfp_flags)
+{
+	struct pnfs_layout_segment *lseg = NULL;
+	struct pnfs_layout_hdr *lo;
+	struct nfs4_filelayout_segment *fl;
+	int status;
+
+	lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
+				  gfp_flags);
+	if (!lseg)
+		lseg = ERR_PTR(-ENOMEM);
+	if (IS_ERR(lseg))
+		goto out;
+
+	lo = NFS_I(ino)->layout;
+	fl = FILELAYOUT_LSEG(lseg);
+
+	status = filelayout_check_deviceid(lo, fl, gfp_flags);
+	if (status)
+		lseg = ERR_PTR(status);
+out:
+	if (IS_ERR(lseg))
+		pnfs_put_lseg(lseg);
+	return lseg;
+}
+
 static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
 	if (!pgio->pg_lseg) {
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-					   req->wb_context,
-					   0,
-					   NFS4_MAX_UINT64,
-					   IOMODE_READ,
-					   false,
-					   GFP_KERNEL);
+		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+						      req->wb_context,
+						      0,
+						      NFS4_MAX_UINT64,
+						      IOMODE_READ,
+						      false,
+						      GFP_KERNEL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
@@ -919,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	int status;
 
 	if (!pgio->pg_lseg) {
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-					   req->wb_context,
-					   0,
-					   NFS4_MAX_UINT64,
-					   IOMODE_RW,
-					   false,
-					   GFP_NOFS);
+		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+						      req->wb_context,
+						      0,
+						      NFS4_MAX_UINT64,
+						      IOMODE_RW,
+						      false,
+						      GFP_NOFS);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 2896cb833a11..79323b5dab0c 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -55,15 +55,16 @@ struct nfs4_file_layout_dsaddr {
 };
 
 struct nfs4_filelayout_segment {
-	struct pnfs_layout_segment generic_hdr;
-	u32 stripe_type;
-	u32 commit_through_mds;
-	u32 stripe_unit;
-	u32 first_stripe_index;
-	u64 pattern_offset;
-	struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
-	unsigned int num_fh;
-	struct nfs_fh **fh_array;
+	struct pnfs_layout_segment	generic_hdr;
+	u32				stripe_type;
+	u32				commit_through_mds;
+	u32				stripe_unit;
+	u32				first_stripe_index;
+	u64				pattern_offset;
+	struct nfs4_deviceid		deviceid;
+	struct nfs4_file_layout_dsaddr	*dsaddr; /* Point to GETDEVINFO data */
+	unsigned int			num_fh;
+	struct nfs_fh			**fh_array;
 };
 
 struct nfs4_filelayout {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index f956ca20a8a3..d913e818858f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 	struct nfs4_pnfs_ds *ret = ds;
 	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+	int status;
 
 	if (ds == NULL) {
 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	if (ds->ds_clp)
 		goto out_test_devid;
 
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
+	if (status) {
+		nfs4_mark_deviceid_unavailable(devid);
+		ret = NULL;
+		goto out;
+	}
 
 out_test_devid:
 	if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f4f39b0ab09b..98b34c9b0564 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
 static inline bool
 ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
 {
-	return nfs4_test_deviceid_unavailable(node);
+	/*
+	 * Flexfiles should never mark a DS unavailable, but if it does
+	 * print a (ratelimited) warning as this can affect performance.
+	 */
+	if (nfs4_test_deviceid_unavailable(node)) {
+		u32 *p = (u32 *)node->deviceid.data;
+
+		pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+				"unavailable device [%x%x%x%x]\n",
+				p[0], p[1], p[2], p[3]);
+		return true;
+	}
+	return false;
 }
 
 static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e5a6f248697b..457cfeb1d5c1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
 		} else
 			goto outerr;
 	}
+
+	if (IS_ERR(mirror->mirror_ds))
+		goto outerr;
+
 	if (mirror->mirror_ds->ds == NULL) {
 		struct nfs4_deviceid_node *devid;
 		devid = &mirror->mirror_ds->id_node;
@@ -384,6 +388,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	struct inode *ino = lseg->pls_layout->plh_inode;
 	struct nfs_server *s = NFS_SERVER(ino);
 	unsigned int max_payload;
+	int status;
 
 	if (!ff_layout_mirror_valid(lseg, mirror, true)) {
 		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +409,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	/* FIXME: For now we assume the server sent only one version of NFS
 	 * to use for the DS.
 	 */
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans,
 			     mirror->mirror_ds->ds_versions[0].version,
 			     mirror->mirror_ds->ds_versions[0].minor_version);
@@ -420,11 +425,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 			mirror->mirror_ds->ds_versions[0].wsize = max_payload;
 		goto out;
 	}
+out_fail:
 	ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
 				 mirror, lseg->pls_range.offset,
 				 lseg->pls_range.length, NFS4ERR_NXIO,
 				 OP_ILLEGAL, GFP_NOIO);
-out_fail:
 	if (fail_return || !ff_layout_has_available_ds(lseg))
 		pnfs_error_mark_layout_for_return(ino, lseg);
 	ds = NULL;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 09ca5095c04e..7b38fedb7e03 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *,
 					   rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
 extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
 extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
 extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 5ae9d64ea08b..8346ccbf2d52 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 	server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
 	server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
 
-	if (server->rsize > server_resp_sz)
+	if (!server->rsize || server->rsize > server_resp_sz)
 		server->rsize = server_resp_sz;
-	if (server->wsize > server_rqst_sz)
+	if (!server->wsize || server->wsize > server_rqst_sz)
 		server->wsize = server_rqst_sz;
 #endif /* CONFIG_NFS_V4_1 */
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b183686c6d4..201ca3f2c4ba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
 	if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
 		return 0;
 
-	/* even though OPEN succeeded, access is denied. Close the file */
-	nfs4_close_state(state, fmode);
 	return -EACCES;
 }
 
@@ -2444,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
 	}
 
 	nfs4_stateid_copy(&stateid, &delegation->stateid);
-	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+		!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+			&delegation->flags)) {
 		rcu_read_unlock();
 		nfs_finish_clear_delegation_stateid(state, &stateid);
 		return;
 	}
 
-	if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
-		rcu_read_unlock();
-		return;
-	}
-
 	cred = get_rpccred(delegation->cred);
 	rcu_read_unlock();
 	status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
@@ -7427,11 +7422,11 @@ static void nfs4_exchange_id_release(void *data)
 	struct nfs41_exchange_id_data *cdata =
 					(struct nfs41_exchange_id_data *)data;
 
-	nfs_put_client(cdata->args.client);
 	if (cdata->xprt) {
 		xprt_put(cdata->xprt);
 		rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
 	}
+	nfs_put_client(cdata->args.client);
 	kfree(cdata->res.impl_id);
 	kfree(cdata->res.server_scope);
 	kfree(cdata->res.server_owner);
@@ -7538,10 +7533,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	task_setup_data.callback_data = calldata;
 
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-	status = PTR_ERR(task);
-		goto out_impl_id;
-	}
+	if (IS_ERR(task))
+		return PTR_ERR(task);
 
 	if (!xprt) {
 		status = rpc_wait_for_completion_task(task);
@@ -7569,6 +7562,7 @@ out_server_owner:
 	kfree(calldata->res.server_owner);
 out_calldata:
 	kfree(calldata);
+	nfs_put_client(clp);
 	goto out;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f0369e362753..80ce289eea05 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		if (len <= 0)
 			goto out;
 		dprintk("%s: name=%s\n", __func__, group_name->data);
-		return NFS_ATTR_FATTR_OWNER_NAME;
+		return NFS_ATTR_FATTR_GROUP_NAME;
 	} else {
 		len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
 				XDR_MAX_NETOBJ);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 63f77b49a586..590e1e35781f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
 struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
 				      gfp_t gfp_flags);
 void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version);
 struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 9414b492439f..7250b95549ec 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,15 +745,17 @@ out:
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server.
  * Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
  */
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version)
 {
-	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-		int err = 0;
+	int err;
 
+again:
+	err = 0;
+	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
 		if (version == 3) {
 			err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
 						       retrans);
@@ -766,12 +768,29 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			err = -EPROTONOSUPPORT;
 		}
 
-		if (err)
-			nfs4_mark_deviceid_unavailable(devid);
 		nfs4_clear_ds_conn_bit(ds);
 	} else {
 		nfs4_wait_ds_connect(ds);
+
+		/* what was waited on didn't connect AND didn't mark unavail */
+		if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+			goto again;
 	}
+
+	/*
+	 * At this point the ds->ds_clp should be ready, but it might have
+	 * hit an error.
+	 */
+	if (!err) {
+		if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+			WARN_ON_ONCE(ds->ds_clp ||
+				!nfs4_test_deviceid_unavailable(devid));
+			return -EINVAL;
+		}
+		err = nfs_client_init_status(ds->ds_clp);
+	}
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e75b056f46f4..abb2c8a3be42 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 			(long long)req_offset(req));
 		if (status < 0) {
 			nfs_context_set_write_error(req->wb_context, status);
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(", error = %d\n", status);
 			goto next;
 		}
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		 * returned by the server against all stored verfs. */
 		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(" OK\n");
 			goto next;
 		}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 73e75ac90525..8bf8f667a8cf 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -538,13 +538,21 @@ out_free:
 
 static ssize_t
 nfsd_print_version_support(char *buf, int remaining, const char *sep,
-		unsigned vers, unsigned minor)
+		unsigned vers, int minor)
 {
-	const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+	const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
 	bool supported = !!nfsd_vers(vers, NFSD_TEST);
 
-	if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+	if (vers == 4 && minor >= 0 &&
+	    !nfsd_minorversion(minor, NFSD_TEST))
 		supported = false;
+	if (minor == 0 && supported)
+		/*
+		 * special case for backward compatability.
+		 * +4.0 is never reported, it is implied by
+		 * +4, unless -4.0 is present.
+		 */
+		return 0;
 	return snprintf(buf, remaining, format, sep,
 			supported ? '+' : '-', vers, minor);
 }
@@ -554,7 +562,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	char *mesg = buf;
 	char *vers, *minorp, sign;
 	int len, num, remaining;
-	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
 	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
@@ -575,6 +582,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 		if (len <= 0) return -EINVAL;
 		do {
 			enum vers_op cmd;
+			unsigned minor;
 			sign = *vers;
 			if (sign == '+' || sign == '-')
 				num = simple_strtol((vers+1), &minorp, 0);
@@ -585,8 +593,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 					return -EINVAL;
 				if (kstrtouint(minorp+1, 0, &minor) < 0)
 					return -EINVAL;
-			} else
-				minor = 0;
+			}
+
 			cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
 			switch(num) {
 			case 2:
@@ -594,8 +602,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 				nfsd_vers(num, cmd);
 				break;
 			case 4:
-				if (nfsd_minorversion(minor, cmd) >= 0)
-					break;
+				if (*minorp == '.') {
+					if (nfsd_minorversion(minor, cmd) < 0)
+						return -EINVAL;
+				} else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+					/*
+					 * Either we have +4 and no minors are enabled,
+					 * or we have -4 and at least one minor is enabled.
+					 * In either case, propagate 'cmd' to all minors.
+					 */
+					minor = 0;
+					while (nfsd_minorversion(minor, cmd) >= 0)
+						minor++;
+				}
+				break;
 			default:
 				return -EINVAL;
 			}
@@ -612,9 +632,11 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	sep = "";
 	remaining = SIMPLE_TRANSACTION_LIMIT;
 	for (num=2 ; num <= 4 ; num++) {
+		int minor;
 		if (!nfsd_vers(num, NFSD_AVAIL))
 			continue;
-		minor = 0;
+
+		minor = -1;
 		do {
 			len = nfsd_print_version_support(buf, remaining,
 					sep, num, minor);
@@ -624,7 +646,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 			buf += len;
 			tlen += len;
 			minor++;
-			sep = " ";
+			if (len)
+				sep = " ";
 		} while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
 	}
 out:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index fa82b7707e85..03a7e9da4da0 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -786,6 +786,7 @@ nfserrno (int errno)
 		{ nfserr_serverfault, -ESERVERFAULT },
 		{ nfserr_serverfault, -ENFILE },
 		{ nfserr_io, -EUCLEAN },
+		{ nfserr_perm, -ENOKEY },
 	};
 	int	i;
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 786a4a2cb2d7..31e1f9593457 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -167,7 +167,8 @@ nfsd_adjust_nfsd_versions4(void)
 
 int nfsd_minorversion(u32 minorversion, enum vers_op change)
 {
-	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
+	    change != NFSD_AVAIL)
 		return -1;
 	switch(change) {
 	case NFSD_SET:
@@ -415,23 +416,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 
 void nfsd_reset_versions(void)
 {
-	int found_one = 0;
 	int i;
 
-	for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
-		if (nfsd_program.pg_vers[i])
-			found_one = 1;
-	}
+	for (i = 0; i < NFSD_NRVERS; i++)
+		if (nfsd_vers(i, NFSD_TEST))
+			return;
 
-	if (!found_one) {
-		for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
-			nfsd_program.pg_vers[i] = nfsd_version[i];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-		for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
-			nfsd_acl_program.pg_vers[i] =
-				nfsd_acl_version[i];
-#endif
-	}
+	for (i = 0; i < NFSD_NRVERS; i++)
+		if (i != 4)
+			nfsd_vers(i, NFSD_SET);
+		else {
+			int minor = 0;
+			while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+				minor++;
+		}
 }
 
 /*
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 67c24351a67f..cd261c8de53a 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -263,8 +263,13 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
 		if (!new_op)
 			return -ENOMEM;
 		new_op->upcall.req.features.features = 0;
-		ret = service_operation(new_op, "orangefs_features", 0);
-		orangefs_features = new_op->downcall.resp.features.features;
+		ret = service_operation(new_op, "orangefs_features",
+		    ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX);
+		if (!ret)
+			orangefs_features =
+			    new_op->downcall.resp.features.features;
+		else
+			orangefs_features = 0;
 		op_release(new_op);
 	} else {
 		orangefs_features = 0;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8f91ec66baa3..d04ea4349909 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1074,6 +1074,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
+		    (table->proc_handler == proc_douintvec) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
diff --git a/fs/stat.c b/fs/stat.c
index fa0be59340cc..c6c963b2546b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -130,9 +130,13 @@ EXPORT_SYMBOL(vfs_getattr);
 int vfs_statx_fd(unsigned int fd, struct kstat *stat,
 		 u32 request_mask, unsigned int query_flags)
 {
-	struct fd f = fdget_raw(fd);
+	struct fd f;
 	int error = -EBADF;
 
+	if (query_flags & ~KSTAT_QUERY_FLAGS)
+		return -EINVAL;
+
+	f = fdget_raw(fd);
 	if (f.file) {
 		error = vfs_getattr(&f.file->f_path, stat,
 				    request_mask, query_flags);
@@ -155,9 +159,6 @@ EXPORT_SYMBOL(vfs_statx_fd);
  * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
  * at the given name from being referenced.
  *
- * The caller must have preset stat->request_mask as for vfs_getattr().  The
- * flags are also used to load up stat->query_flags.
- *
  * 0 will be returned on success, and a -ve error code if unsuccessful.
  */
 int vfs_statx(int dfd, const char __user *filename, int flags,
@@ -509,46 +510,38 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
-static inline int __put_timestamp(struct timespec *kts,
-				  struct statx_timestamp __user *uts)
-{
-	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
-		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
-		__put_user(0,			&uts->__reserved	));
-}
-
-/*
- * Set the statx results.
- */
-static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+static noinline_for_stack int
+cp_statx(const struct kstat *stat, struct statx __user *buffer)
 {
-	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
-	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
-
-	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
-	    __put_user(stat->mode,		&buffer->stx_mode	) ||
-	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
-	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
-	    __put_user(uid,			&buffer->stx_uid	) ||
-	    __put_user(gid,			&buffer->stx_gid	) ||
-	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
-	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
-	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
-	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
-	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
-	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
-	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
-	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
-	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
-	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
-	    __put_user(stat->ino,		&buffer->stx_ino	) ||
-	    __put_user(stat->size,		&buffer->stx_size	) ||
-	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
-	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
-	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
-		return -EFAULT;
-
-	return 0;
+	struct statx tmp;
+
+	memset(&tmp, 0, sizeof(tmp));
+
+	tmp.stx_mask = stat->result_mask;
+	tmp.stx_blksize = stat->blksize;
+	tmp.stx_attributes = stat->attributes;
+	tmp.stx_nlink = stat->nlink;
+	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.stx_mode = stat->mode;
+	tmp.stx_ino = stat->ino;
+	tmp.stx_size = stat->size;
+	tmp.stx_blocks = stat->blocks;
+	tmp.stx_attributes_mask = stat->attributes_mask;
+	tmp.stx_atime.tv_sec = stat->atime.tv_sec;
+	tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
+	tmp.stx_btime.tv_sec = stat->btime.tv_sec;
+	tmp.stx_btime.tv_nsec = stat->btime.tv_nsec;
+	tmp.stx_ctime.tv_sec = stat->ctime.tv_sec;
+	tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
+	tmp.stx_mtime.tv_sec = stat->mtime.tv_sec;
+	tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
+	tmp.stx_rdev_major = MAJOR(stat->rdev);
+	tmp.stx_rdev_minor = MINOR(stat->rdev);
+	tmp.stx_dev_major = MAJOR(stat->dev);
+	tmp.stx_dev_minor = MINOR(stat->dev);
+
+	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
 /**
@@ -570,10 +563,10 @@ SYSCALL_DEFINE5(statx,
 	struct kstat stat;
 	int error;
 
+	if (mask & STATX__RESERVED)
+		return -EINVAL;
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
-	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
-		return -EFAULT;
 
 	if (filename)
 		error = vfs_statx(dfd, filename, flags, &stat, mask);
@@ -581,7 +574,8 @@ SYSCALL_DEFINE5(statx,
 		error = vfs_statx_fd(dfd, &stat, mask, flags);
 	if (error)
 		return error;
-	return statx_set_result(&stat, buffer);
+
+	return cp_statx(&stat, buffer);
 }
 
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b803213d1307..39c75a86c67f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
 {
 	const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
 	struct kobject *kobj = of->kn->parent->priv;
-	size_t len;
+	ssize_t len;
 
 	/*
 	 * If buf != of->prealloc_buf, we don't know how
@@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
 	if (WARN_ON_ONCE(buf != of->prealloc_buf))
 		return 0;
 	len = ops->show(kobj, of->kn->priv, buf);
+	if (len < 0)
+		return len;
 	if (pos) {
 		if (len <= pos)
 			return 0;
 		len -= pos;
 		memmove(buf, buf + pos, len);
 	}
-	return min(count, len);
+	return min_t(ssize_t, count, len);
 }
 
 /* kernfs write callback for regular sysfs files */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1d227b0fcf49..f7555fc25877 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	 *	protocols: aa:... bb:...
 	 */
 	seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
-		   pending, total, UFFD_API, UFFD_API_FEATURES,
+		   pending, total, UFFD_API, ctx->features,
 		   UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
 }
 #endif
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274..39f8604f764e 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197..e84af093b2ab 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,112 @@ xfs_dir2_sf_check(
 }
 #endif	/* DEBUG */
 
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+	struct xfs_inode		*ip)
+{
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_dir2_sf_hdr		*sfp;
+	struct xfs_dir2_sf_entry	*sfep;
+	struct xfs_dir2_sf_entry	*next_sfep;
+	char				*endp;
+	const struct xfs_dir_ops	*dops;
+	struct xfs_ifork		*ifp;
+	xfs_ino_t			ino;
+	int				i;
+	int				i8count;
+	int				offset;
+	int				size;
+	int				error;
+	__uint8_t			filetype;
+
+	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+	/*
+	 * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+	 * so we can only trust the mountpoint to have the right pointer.
+	 */
+	dops = xfs_dir_get_ops(mp, NULL);
+
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+	size = ifp->if_bytes;
+
+	/*
+	 * Give up if the directory is way too short.
+	 */
+	if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+	    size < xfs_dir2_sf_hdr_size(sfp->i8count))
+		return -EFSCORRUPTED;
+
+	endp = (char *)sfp + size;
+
+	/* Check .. entry */
+	ino = dops->sf_get_parent_ino(sfp);
+	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+	error = xfs_dir_ino_validate(mp, ino);
+	if (error)
+		return error;
+	offset = dops->data_first_offset;
+
+	/* Check all reported entries */
+	sfep = xfs_dir2_sf_firstentry(sfp);
+	for (i = 0; i < sfp->count; i++) {
+		/*
+		 * struct xfs_dir2_sf_entry has a variable length.
+		 * Check the fixed-offset parts of the structure are
+		 * within the data buffer.
+		 */
+		if (((char *)sfep + sizeof(*sfep)) >= endp)
+			return -EFSCORRUPTED;
+
+		/* Don't allow names with known bad length. */
+		if (sfep->namelen == 0)
+			return -EFSCORRUPTED;
+
+		/*
+		 * Check that the variable-length part of the structure is
+		 * within the data buffer.  The next entry starts after the
+		 * name component, so nextentry is an acceptable test.
+		 */
+		next_sfep = dops->sf_nextentry(sfp, sfep);
+		if (endp < (char *)next_sfep)
+			return -EFSCORRUPTED;
+
+		/* Check that the offsets always increase. */
+		if (xfs_dir2_sf_get_offset(sfep) < offset)
+			return -EFSCORRUPTED;
+
+		/* Check the inode number. */
+		ino = dops->sf_get_ino(sfp, sfep);
+		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+		error = xfs_dir_ino_validate(mp, ino);
+		if (error)
+			return error;
+
+		/* Check the file type. */
+		filetype = dops->sf_get_ftype(sfep);
+		if (filetype >= XFS_DIR3_FT_MAX)
+			return -EFSCORRUPTED;
+
+		offset = xfs_dir2_sf_get_offset(sfep) +
+				dops->data_entsize(sfep->namelen);
+
+		sfep = next_sfep;
+	}
+	if (i8count != sfp->i8count)
+		return -EFSCORRUPTED;
+	if ((void *)sfep != (void *)endp)
+		return -EFSCORRUPTED;
+
+	/* Make sure this whole thing ought to be in local format. */
+	if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	    (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
 /*
  * Create a new (shortform) directory.
  */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6..8a37efe04de3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -210,6 +212,16 @@ xfs_iformat_fork(
 	if (error)
 		return error;
 
+	/* Check inline dir contents. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
+		error = xfs_dir2_sf_verify(ip);
+		if (error) {
+			xfs_idestroy_fork(ip, XFS_DATA_FORK);
+			return error;
+		}
+	}
+
 	if (xfs_is_reflink_inode(ip)) {
 		ASSERT(ip->i_cowfp == NULL);
 		xfs_ifork_init_cow(ip);
@@ -320,7 +332,6 @@ xfs_iformat_local(
 	int		whichfork,
 	int		size)
 {
-
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dcea5966..828532ce0adc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
 	/*
 	 * Now that we've unmap all full blocks we'll have to zero out any
 	 * partial block at the beginning and/or end.  xfs_zero_range is
-	 * smart enough to skip any holes, including those we just created.
+	 * smart enough to skip any holes, including those we just created,
+	 * but we must take care not to zero beyond EOF and enlarge i_size.
 	 */
+
+	if (offset >= XFS_ISIZE(ip))
+		return 0;
+
+	if (offset + len > XFS_ISIZE(ip))
+		len = XFS_ISIZE(ip) - offset;
+
 	return xfs_zero_range(ip, offset, len, NULL);
 }
 
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd8..ad9396e516f6 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
 	struct xfs_da_geometry	*geo = args->geo;
 
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Give up if the directory is way too short.
-	 */
-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return -EIO;
-	}
-
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 
-	if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-		return -EFSCORRUPTED;
-
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7eaf1ef74e3c..7605d8396596 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -50,6 +50,7 @@
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_inode_zone;
 
@@ -3546,6 +3547,12 @@ xfs_iflush_int(
 	if (ip->i_d.di_version < 3)
 		ip->i_d.di_flushiter++;
 
+	/* Check the inline directory data. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+	    xfs_dir2_sf_verify(ip))
+		goto corrupt_out;
+
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
 	 * copy out the core of the inode, because if the inode is dirty at all
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 229cc6a6d8ef..ebfc13350f9a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -516,6 +516,20 @@ xfs_vn_getattr(
 	stat->blocks =
 		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
 
+	if (ip->i_d.di_version == 3) {
+		if (request_mask & STATX_BTIME) {
+			stat->result_mask |= STATX_BTIME;
+			stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
+			stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
+		}
+	}
+
+	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
+		stat->attributes |= STATX_ATTR_NODUMP;
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2a6d9b1558e0..26d67ce3c18d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -583,7 +583,7 @@ xfs_inumbers(
 		return error;
 
 	bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-	buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
+	buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
 	do {
 		struct xfs_inobt_rec_incore	r;
 		int				stat;
author	Ingo Molnar <mingo@kernel.org>	2017-04-11 10:05:36 +0300
committer	Ingo Molnar <mingo@kernel.org>	2017-04-11 10:05:36 +0300
commit	36a4dfc37856cae40e4d864950f768d8b7d4f9f5 (patch)
tree	3aa6a1459a699b986632f64b740e8e98e3ce6500 /fs
parent	a481db34b9beb7a9647c23f2320dd38a2b1d681f (diff)
parent	39da7c509acff13fc8cb12ec1bb20337c988ed36 (diff)
download	linux-36a4dfc37856cae40e4d864950f768d8b7d4f9f5.tar.xz