24 files changed, 318 insertions, 212 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2d3e32ebfd15..8729cf68d2fe 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1552,7 +1552,6 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
 {
 	int ret;
 	int type;
-	struct btrfs_tree_block_info *info;
 	struct btrfs_extent_inline_ref *eiref;
 
 	if (*ptr == (unsigned long)-1)
@@ -1573,9 +1572,17 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
 	}
 
 	/* we can treat both ref types equally here */
-	info = (struct btrfs_tree_block_info *)(ei + 1);
 	*out_root = btrfs_extent_inline_ref_offset(eb, eiref);
-	*out_level = btrfs_tree_block_level(eb, info);
+
+	if (key->type == BTRFS_EXTENT_ITEM_KEY) {
+		struct btrfs_tree_block_info *info;
+
+		info = (struct btrfs_tree_block_info *)(ei + 1);
+		*out_level = btrfs_tree_block_level(eb, info);
+	} else {
+		ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
+		*out_level = (u8)key->offset;
+	}
 
 	if (ret == 1)
 		*ptr = (unsigned long)-1;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 054577bddaf2..de4e70fb3cbb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1857,6 +1857,14 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode)
 {
 	struct btrfs_delayed_node *delayed_node;
 
+	/*
+	 * we don't do delayed inode updates during log recovery because it
+	 * leads to enospc problems.  This means we also can't do
+	 * delayed inode refs
+	 */
+	if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
+		return -EAGAIN;
+
 	delayed_node = btrfs_get_or_create_delayed_node(inode);
 	if (IS_ERR(delayed_node))
 		return PTR_ERR(delayed_node);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a80b97100d90..15116585e714 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3139,9 +3139,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 
 	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
-	if (ret < 0)
+	if (ret) {
+		if (ret > 0)
+			ret = -ENOENT;
 		goto fail;
-	BUG_ON(ret); /* Corruption */
+	}
 
 	leaf = path->nodes[0];
 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3149,11 +3151,9 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
 fail:
-	if (ret) {
+	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
-		return ret;
-	}
-	return 0;
+	return ret;
 
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e687bb0dc73a..8bf326affb94 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6255,8 +6255,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 out_fail:
 	btrfs_end_transaction(trans, root);
-	if (drop_on_err)
+	if (drop_on_err) {
+		inode_dec_link_count(inode);
 		iput(inode);
+	}
 	btrfs_balance_delayed_items(root);
 	btrfs_btree_balance_dirty(root);
 	return err;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f2bb13a23f86..9e1569ffbf6e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2607,9 +2607,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
 		ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
 					     flags, gen, mirror_num,
 					     have_csum ? csum : NULL);
-skip:
 		if (ret)
 			return ret;
+skip:
 		len -= l;
 		logical += l;
 		physical += l;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f5013d92a7e6..c81c0e004588 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1416,7 +1416,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 		}
 	}
 
-	dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+	dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n",
 	     inode, ceph_vinop(inode), len, locked_page);
 
 	if (len > 0) {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e5d3eadf47b1..bed43081720f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
 	/* fallback to generic here if not in extents fmt */
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-		return __generic_block_fiemap(inode, fieinfo, start, len,
-					      ext4_get_block);
+		return generic_block_fiemap(inode, fieinfo, start, len,
+			ext4_get_block);
 
 	if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
 		return -EBADR;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 513c12cf444c..8131be8c0af3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -273,19 +273,24 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
  * we determine this extent as a data or a hole according to whether the
  * page cache has data or not.
  */
-static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
-				     loff_t endoff, loff_t *offset)
+static int ext4_find_unwritten_pgoff(struct inode *inode,
+				     int whence,
+				     struct ext4_map_blocks *map,
+				     loff_t *offset)
 {
 	struct pagevec pvec;
+	unsigned int blkbits;
 	pgoff_t index;
 	pgoff_t end;
+	loff_t endoff;
 	loff_t startoff;
 	loff_t lastoff;
 	int found = 0;
 
+	blkbits = inode->i_sb->s_blocksize_bits;
 	startoff = *offset;
 	lastoff = startoff;
-
+	endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
 
 	index = startoff >> PAGE_CACHE_SHIFT;
 	end = endoff >> PAGE_CACHE_SHIFT;
@@ -403,144 +408,147 @@ out:
 static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct fiemap_extent_info fie;
-	struct fiemap_extent ext[2];
-	loff_t next;
-	int i, ret = 0;
+	struct ext4_map_blocks map;
+	struct extent_status es;
+	ext4_lblk_t start, last, end;
+	loff_t dataoff, isize;
+	int blkbits;
+	int ret = 0;
 
 	mutex_lock(&inode->i_mutex);
-	if (offset >= inode->i_size) {
+
+	isize = i_size_read(inode);
+	if (offset >= isize) {
 		mutex_unlock(&inode->i_mutex);
 		return -ENXIO;
 	}
-	fie.fi_flags = 0;
-	fie.fi_extents_max = 2;
-	fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
-	while (1) {
-		mm_segment_t old_fs = get_fs();
-
-		fie.fi_extents_mapped = 0;
-		memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
-
-		set_fs(get_ds());
-		ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
-		set_fs(old_fs);
-		if (ret)
+
+	blkbits = inode->i_sb->s_blocksize_bits;
+	start = offset >> blkbits;
+	last = start;
+	end = isize >> blkbits;
+	dataoff = offset;
+
+	do {
+		map.m_lblk = last;
+		map.m_len = end - last + 1;
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+			if (last != start)
+				dataoff = (loff_t)last << blkbits;
 			break;
+		}
 
-		/* No extents found, EOF */
-		if (!fie.fi_extents_mapped) {
-			ret = -ENXIO;
+		/*
+		 * If there is a delay extent at this offset,
+		 * it will be as a data.
+		 */
+		ext4_es_find_delayed_extent_range(inode, last, last, &es);
+		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+			if (last != start)
+				dataoff = (loff_t)last << blkbits;
 			break;
 		}
-		for (i = 0; i < fie.fi_extents_mapped; i++) {
-			next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
 
-			if (offset < (loff_t)ext[i].fe_logical)
-				offset = (loff_t)ext[i].fe_logical;
-			/*
-			 * If extent is not unwritten, then it contains valid
-			 * data, mapped or delayed.
-			 */
-			if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
-				goto out;
+		/*
+		 * If there is a unwritten extent at this offset,
+		 * it will be as a data or a hole according to page
+		 * cache that has data or not.
+		 */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+			int unwritten;
+			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+							      &map, &dataoff);
+			if (unwritten)
+				break;
+		}
 
-			/*
-			 * If there is a unwritten extent at this offset,
-			 * it will be as a data or a hole according to page
-			 * cache that has data or not.
-			 */
-			if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
-						      next, &offset))
-				goto out;
+		last++;
+		dataoff = (loff_t)last << blkbits;
+	} while (last <= end);
 
-			if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
-				ret = -ENXIO;
-				goto out;
-			}
-			offset = next;
-		}
-	}
-	if (offset > inode->i_size)
-		offset = inode->i_size;
-out:
 	mutex_unlock(&inode->i_mutex);
-	if (ret)
-		return ret;
 
-	return vfs_setpos(file, offset, maxsize);
+	if (dataoff > isize)
+		return -ENXIO;
+
+	return vfs_setpos(file, dataoff, maxsize);
 }
 
 /*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE
+ * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
  */
 static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct fiemap_extent_info fie;
-	struct fiemap_extent ext[2];
-	loff_t next;
-	int i, ret = 0;
+	struct ext4_map_blocks map;
+	struct extent_status es;
+	ext4_lblk_t start, last, end;
+	loff_t holeoff, isize;
+	int blkbits;
+	int ret = 0;
 
 	mutex_lock(&inode->i_mutex);
-	if (offset >= inode->i_size) {
+
+	isize = i_size_read(inode);
+	if (offset >= isize) {
 		mutex_unlock(&inode->i_mutex);
 		return -ENXIO;
 	}
 
-	fie.fi_flags = 0;
-	fie.fi_extents_max = 2;
-	fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
-	while (1) {
-		mm_segment_t old_fs = get_fs();
-
-		fie.fi_extents_mapped = 0;
-		memset(ext, 0, sizeof(*ext));
+	blkbits = inode->i_sb->s_blocksize_bits;
+	start = offset >> blkbits;
+	last = start;
+	end = isize >> blkbits;
+	holeoff = offset;
 
-		set_fs(get_ds());
-		ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
-		set_fs(old_fs);
-		if (ret)
-			break;
+	do {
+		map.m_lblk = last;
+		map.m_len = end - last + 1;
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+			last += ret;
+			holeoff = (loff_t)last << blkbits;
+			continue;
+		}
 
-		/* No extents found */
-		if (!fie.fi_extents_mapped)
-			break;
+		/*
+		 * If there is a delay extent at this offset,
+		 * we will skip this extent.
+		 */
+		ext4_es_find_delayed_extent_range(inode, last, last, &es);
+		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+			last = es.es_lblk + es.es_len;
+			holeoff = (loff_t)last << blkbits;
+			continue;
+		}
 
-		for (i = 0; i < fie.fi_extents_mapped; i++) {
-			next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
-			/*
-			 * If extent is not unwritten, then it contains valid
-			 * data, mapped or delayed.
-			 */
-			if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
-				if (offset < (loff_t)ext[i].fe_logical)
-					goto out;
-				offset = next;
+		/*
+		 * If there is a unwritten extent at this offset,
+		 * it will be as a data or a hole according to page
+		 * cache that has data or not.
+		 */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+			int unwritten;
+			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+							      &map, &holeoff);
+			if (!unwritten) {
+				last += ret;
+				holeoff = (loff_t)last << blkbits;
 				continue;
 			}
-			/*
-			 * If there is a unwritten extent at this offset,
-			 * it will be as a data or a hole according to page
-			 * cache that has data or not.
-			 */
-			if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-						      next, &offset))
-				goto out;
-
-			offset = next;
-			if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
-				goto out;
 		}
-	}
-	if (offset > inode->i_size)
-		offset = inode->i_size;
-out:
+
+		/* find a hole */
+		break;
+	} while (last <= end);
+
 	mutex_unlock(&inode->i_mutex);
-	if (ret)
-		return ret;
 
-	return vfs_setpos(file, offset, maxsize);
+	if (holeoff > isize)
+		holeoff = isize;
+
+	return vfs_setpos(file, holeoff, maxsize);
 }
 
 /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bf76f405a5f9..8a8ec6293b19 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -24,6 +24,18 @@ int ext4_resize_begin(struct super_block *sb)
 		return -EPERM;
 
 	/*
+	 * If we are not using the primary superblock/GDT copy don't resize,
+         * because the user tools have no way of handling this.  Probably a
+         * bad time to do it anyways.
+         */
+	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+		ext4_warning(sb, "won't resize using backup superblock at %llu",
+			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
+		return -EPERM;
+	}
+
+	/*
 	 * We are not allowed to do online-resizing on a filesystem mounted
 	 * with error, because it can destroy the filesystem easily.
 	 */
@@ -758,18 +770,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
 		       gdb_num);
 
-	/*
-	 * If we are not using the primary superblock/GDT copy don't resize,
-         * because the user tools have no way of handling this.  Probably a
-         * bad time to do it anyways.
-         */
-	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
-	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-		ext4_warning(sb, "won't resize using backup superblock at %llu",
-			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
-		return -EPERM;
-	}
-
 	gdb_bh = sb_bread(sb, gdblock);
 	if (!gdb_bh)
 		return -EIO;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43c92b1685cb..74c5f53595fb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3482,7 +3482,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
 	    EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
-		ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
+		ext4_warning(sb, "metadata_csum and uninit_bg are "
 			     "redundant flags; please run fsck.");
 
 	/* Check for a known checksum algorithm */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 99d440a4a6ba..ee85cd4e136a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,14 +740,15 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 		O_RDONLY	| O_WRONLY	| O_RDWR	|
 		O_CREAT		| O_EXCL	| O_NOCTTY	|
 		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
 		__O_SYNC	| O_DSYNC	| FASYNC	|
 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
-		__FMODE_EXEC	| O_PATH	| __O_TMPFILE
+		__FMODE_EXEC	| O_PATH	| __O_TMPFILE	|
+		__FMODE_NONOTIFY
 		));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba1107977f2e..ed19a7d622fa 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -131,6 +131,13 @@ static void fuse_req_init_context(struct fuse_req *req)
 	req->in.h.pid = current->pid;
 }
 
+void fuse_set_initialized(struct fuse_conn *fc)
+{
+	/* Make sure stores before this are seen on another CPU */
+	smp_wmb();
+	fc->initialized = 1;
+}
+
 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 {
 	return !fc->initialized || (for_background && fc->blocked);
@@ -155,6 +162,8 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
 		if (intr)
 			goto out;
 	}
+	/* Matches smp_wmb() in fuse_set_initialized() */
+	smp_rmb();
 
 	err = -ENOTCONN;
 	if (!fc->connected)
@@ -253,6 +262,8 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
 
 	atomic_inc(&fc->num_waiting);
 	wait_event(fc->blocked_waitq, fc->initialized);
+	/* Matches smp_wmb() in fuse_set_initialized() */
+	smp_rmb();
 	req = fuse_request_alloc(0);
 	if (!req)
 		req = get_reserved_req(fc, file);
@@ -511,6 +522,39 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send);
 
+static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
+{
+	if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
+		args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
+
+	if (fc->minor < 9) {
+		switch (args->in.h.opcode) {
+		case FUSE_LOOKUP:
+		case FUSE_CREATE:
+		case FUSE_MKNOD:
+		case FUSE_MKDIR:
+		case FUSE_SYMLINK:
+		case FUSE_LINK:
+			args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+			break;
+		case FUSE_GETATTR:
+		case FUSE_SETATTR:
+			args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+			break;
+		}
+	}
+	if (fc->minor < 12) {
+		switch (args->in.h.opcode) {
+		case FUSE_CREATE:
+			args->in.args[0].size = sizeof(struct fuse_open_in);
+			break;
+		case FUSE_MKNOD:
+			args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
+			break;
+		}
+	}
+}
+
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 {
 	struct fuse_req *req;
@@ -520,6 +564,9 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
+	fuse_adjust_compat(fc, args);
+
 	req->in.h.opcode = args->in.h.opcode;
 	req->in.h.nodeid = args->in.h.nodeid;
 	req->in.numargs = args->in.numargs;
@@ -2127,7 +2174,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
 	if (fc->connected) {
 		fc->connected = 0;
 		fc->blocked = 0;
-		fc->initialized = 1;
+		fuse_set_initialized(fc);
 		end_io_requests(fc);
 		end_queued_requests(fc);
 		end_polls(fc);
@@ -2146,7 +2193,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
 		spin_lock(&fc->lock);
 		fc->connected = 0;
 		fc->blocked = 0;
-		fc->initialized = 1;
+		fuse_set_initialized(fc);
 		end_queued_requests(fc);
 		end_polls(fc);
 		wake_up_all(&fc->blocked_waitq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 252b8a5de8b5..08e7b1a9d5d0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -156,10 +156,7 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
 	args->in.args[0].size = name->len + 1;
 	args->in.args[0].value = name->name;
 	args->out.numargs = 1;
-	if (fc->minor < 9)
-		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-	else
-		args->out.args[0].size = sizeof(struct fuse_entry_out);
+	args->out.args[0].size = sizeof(struct fuse_entry_out);
 	args->out.args[0].value = outarg;
 }
 
@@ -422,16 +419,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	args.in.h.opcode = FUSE_CREATE;
 	args.in.h.nodeid = get_node_id(dir);
 	args.in.numargs = 2;
-	args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
-						sizeof(inarg);
+	args.in.args[0].size = sizeof(inarg);
 	args.in.args[0].value = &inarg;
 	args.in.args[1].size = entry->d_name.len + 1;
 	args.in.args[1].value = entry->d_name.name;
 	args.out.numargs = 2;
-	if (fc->minor < 9)
-		args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-	else
-		args.out.args[0].size = sizeof(outentry);
+	args.out.args[0].size = sizeof(outentry);
 	args.out.args[0].value = &outentry;
 	args.out.args[1].size = sizeof(outopen);
 	args.out.args[1].value = &outopen;
@@ -539,10 +532,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 	memset(&outarg, 0, sizeof(outarg));
 	args->in.h.nodeid = get_node_id(dir);
 	args->out.numargs = 1;
-	if (fc->minor < 9)
-		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-	else
-		args->out.args[0].size = sizeof(outarg);
+	args->out.args[0].size = sizeof(outarg);
 	args->out.args[0].value = &outarg;
 	err = fuse_simple_request(fc, args);
 	if (err)
@@ -592,8 +582,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 	inarg.umask = current_umask();
 	args.in.h.opcode = FUSE_MKNOD;
 	args.in.numargs = 2;
-	args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
-						sizeof(inarg);
+	args.in.args[0].size = sizeof(inarg);
 	args.in.args[0].value = &inarg;
 	args.in.args[1].size = entry->d_name.len + 1;
 	args.in.args[1].value = entry->d_name.name;
@@ -899,10 +888,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 	args.in.args[0].size = sizeof(inarg);
 	args.in.args[0].value = &inarg;
 	args.out.numargs = 1;
-	if (fc->minor < 9)
-		args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
-	else
-		args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].size = sizeof(outarg);
 	args.out.args[0].value = &outarg;
 	err = fuse_simple_request(fc, &args);
 	if (!err) {
@@ -1574,10 +1560,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
 	args->in.args[0].size = sizeof(*inarg_p);
 	args->in.args[0].value = inarg_p;
 	args->out.numargs = 1;
-	if (fc->minor < 9)
-		args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
-	else
-		args->out.args[0].size = sizeof(*outarg_p);
+	args->out.args[0].size = sizeof(*outarg_p);
 	args->out.args[0].value = outarg_p;
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e0fc6725d1d0..1cdfb07c1376 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -906,4 +906,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 		    struct file *file);
 
+void fuse_set_initialized(struct fuse_conn *fc);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6749109f255d..f38256e4476e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -424,8 +424,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 	args.in.h.opcode = FUSE_STATFS;
 	args.in.h.nodeid = get_node_id(dentry->d_inode);
 	args.out.numargs = 1;
-	args.out.args[0].size =
-		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
+	args.out.args[0].size = sizeof(outarg);
 	args.out.args[0].value = &outarg;
 	err = fuse_simple_request(fc, &args);
 	if (!err)
@@ -898,7 +897,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		fc->max_write = max_t(unsigned, 4096, fc->max_write);
 		fc->conn_init = 1;
 	}
-	fc->initialized = 1;
+	fuse_set_initialized(fc);
 	wake_up_all(&fc->blocked_waitq);
 }
 
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 37989f02a226..2d881b381d2b 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -201,10 +201,14 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)
 static int kernfs_name_compare(unsigned int hash, const char *name,
 			       const void *ns, const struct kernfs_node *kn)
 {
-	if (hash != kn->hash)
-		return hash - kn->hash;
-	if (ns != kn->ns)
-		return ns - kn->ns;
+	if (hash < kn->hash)
+		return -1;
+	if (hash > kn->hash)
+		return 1;
+	if (ns < kn->ns)
+		return -1;
+	if (ns > kn->ns)
+		return 1;
 	return strcmp(name, kn->name);
 }
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index e94c887da2d7..55505cbe11af 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -138,10 +138,6 @@ lockd(void *vrqstp)
 
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
-	if (!nlm_timeout)
-		nlm_timeout = LOCKD_DFLT_TIMEO;
-	nlmsvc_timeout = nlm_timeout * HZ;
-
 	/*
 	 * The main request loop. We don't terminate until the last
 	 * NFS mount or NFS daemon has gone away.
@@ -350,6 +346,10 @@ static struct svc_serv *lockd_create_svc(void)
 		printk(KERN_WARNING
 			"lockd_up: no pid, %d users??\n", nlmsvc_users);
 
+	if (!nlm_timeout)
+		nlm_timeout = LOCKD_DFLT_TIMEO;
+	nlmsvc_timeout = nlm_timeout * HZ;
+
 	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup);
 	if (!serv) {
 		printk(KERN_WARNING "lockd_up: create service failed\n");
diff --git a/fs/locks.c b/fs/locks.c
index 735b8d3fa78c..59e2f905e4ff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1702,7 +1702,7 @@ static int generic_delete_lease(struct file *filp)
 			break;
 	}
 	trace_generic_delete_lease(inode, fl);
-	if (fl)
+	if (fl && IS_LEASE(fl))
 		error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose);
 	spin_unlock(&inode->i_lock);
 	locks_dispose_list(&dispose);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 03311259b0c4..953daa44a282 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -228,6 +228,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 	kfree(clp->cl_serverowner);
 	kfree(clp->cl_serverscope);
 	kfree(clp->cl_implid);
+	kfree(clp->cl_owner_id);
 }
 
 void nfs4_free_client(struct nfs_client *clp)
@@ -452,6 +453,14 @@ static void nfs4_swap_callback_idents(struct nfs_client *keep,
 	spin_unlock(&nn->nfs_client_lock);
 }
 
+static bool nfs4_match_client_owner_id(const struct nfs_client *clp1,
+		const struct nfs_client *clp2)
+{
+	if (clp1->cl_owner_id == NULL || clp2->cl_owner_id == NULL)
+		return true;
+	return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0;
+}
+
 /**
  * nfs40_walk_client_list - Find server that recognizes a client ID
  *
@@ -483,9 +492,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		if (pos->rpc_ops != new->rpc_ops)
 			continue;
 
-		if (pos->cl_proto != new->cl_proto)
-			continue;
-
 		if (pos->cl_minorversion != new->cl_minorversion)
 			continue;
 
@@ -510,6 +516,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		if (pos->cl_clientid != new->cl_clientid)
 			continue;
 
+		if (!nfs4_match_client_owner_id(pos, new))
+			continue;
+
 		atomic_inc(&pos->cl_count);
 		spin_unlock(&nn->nfs_client_lock);
 
@@ -566,20 +575,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b)
 }
 
 /*
- * Returns true if the server owners match
+ * Returns true if the server major ids match
  */
 static bool
-nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b)
+nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b)
 {
 	struct nfs41_server_owner *o1 = a->cl_serverowner;
 	struct nfs41_server_owner *o2 = b->cl_serverowner;
 
-	if (o1->minor_id != o2->minor_id) {
-		dprintk("NFS: --> %s server owner minor IDs do not match\n",
-			__func__);
-		return false;
-	}
-
 	if (o1->major_id_sz != o2->major_id_sz)
 		goto out_major_mismatch;
 	if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
@@ -621,9 +624,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (pos->rpc_ops != new->rpc_ops)
 			continue;
 
-		if (pos->cl_proto != new->cl_proto)
-			continue;
-
 		if (pos->cl_minorversion != new->cl_minorversion)
 			continue;
 
@@ -654,7 +654,19 @@ int nfs41_walk_client_list(struct nfs_client *new,
 		if (!nfs4_match_clientids(pos, new))
 			continue;
 
-		if (!nfs4_match_serverowners(pos, new))
+		/*
+		 * Note that session trunking is just a special subcase of
+		 * client id trunking. In either case, we want to fall back
+		 * to using the existing nfs_client.
+		 */
+		if (!nfs4_check_clientid_trunking(pos, new))
+			continue;
+
+		/* Unlike NFSv4.0, we know that NFSv4.1 always uses the
+		 * uniform string, however someone might switch the
+		 * uniquifier string on us.
+		 */
+		if (!nfs4_match_client_owner_id(pos, new))
 			continue;
 
 		atomic_inc(&pos->cl_count);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e7f8d5ff2581..c347705b0161 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1117,8 +1117,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
 		return 0;
 	if ((delegation->type & fmode) != fmode)
 		return 0;
-	if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
-		return 0;
 	if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
 		return 0;
 	nfs_mark_delegation_referenced(delegation);
@@ -4917,11 +4915,14 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
 }
 
 static unsigned int
-nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
+nfs4_init_nonuniform_client_string(struct nfs_client *clp,
 				   char *buf, size_t len)
 {
 	unsigned int result;
 
+	if (clp->cl_owner_id != NULL)
+		return strlcpy(buf, clp->cl_owner_id, len);
+
 	rcu_read_lock();
 	result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
 				clp->cl_ipaddr,
@@ -4930,24 +4931,32 @@ nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
 				rpc_peeraddr2str(clp->cl_rpcclient,
 							RPC_DISPLAY_PROTO));
 	rcu_read_unlock();
+	clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
 	return result;
 }
 
 static unsigned int
-nfs4_init_uniform_client_string(const struct nfs_client *clp,
+nfs4_init_uniform_client_string(struct nfs_client *clp,
 				char *buf, size_t len)
 {
 	const char *nodename = clp->cl_rpcclient->cl_nodename;
+	unsigned int result;
+
+	if (clp->cl_owner_id != NULL)
+		return strlcpy(buf, clp->cl_owner_id, len);
 
 	if (nfs4_client_id_uniquifier[0] != '\0')
-		return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
+		result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
 				clp->rpc_ops->version,
 				clp->cl_minorversion,
 				nfs4_client_id_uniquifier,
 				nodename);
-	return scnprintf(buf, len, "Linux NFSv%u.%u %s",
+	else
+		result = scnprintf(buf, len, "Linux NFSv%u.%u %s",
 				clp->rpc_ops->version, clp->cl_minorversion,
 				nodename);
+	clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
+	return result;
 }
 
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3550a9c87616..c06a1ba80d73 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3897,11 +3897,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 		status = nfs4_setlease(dp);
 		goto out;
 	}
-	atomic_inc(&fp->fi_delegees);
 	if (fp->fi_had_conflict) {
 		status = -EAGAIN;
 		goto out_unlock;
 	}
+	atomic_inc(&fp->fi_delegees);
 	hash_delegation_locked(dp, fp);
 	status = 0;
 out_unlock:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c991616acca9..bff8567aa42d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -259,16 +259,15 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	start = buf;
 	group = file->private_data;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
+	add_wait_queue(&group->notification_waitq, &wait);
 	while (1) {
-		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
-
 		mutex_lock(&group->notification_mutex);
 		kevent = get_one_event(group, count);
 		mutex_unlock(&group->notification_mutex);
@@ -289,7 +288,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 
 			if (start != buf)
 				break;
-			schedule();
+
+			wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 			continue;
 		}
 
@@ -318,8 +318,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		buf += ret;
 		count -= ret;
 	}
+	remove_wait_queue(&group->notification_waitq, &wait);
 
-	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 79b5af5e6a7b..cecd875653e4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2023,11 +2023,8 @@ leave:
 	dlm_lockres_drop_inflight_ref(dlm, res);
 	spin_unlock(&res->spinlock);
 
-	if (ret < 0) {
+	if (ret < 0)
 		mlog_errno(ret);
-		if (newlock)
-			dlm_lock_put(newlock);
-	}
 
 	return ret;
 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b931e04e3388..914c121ec890 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -94,6 +94,14 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 				     struct inode *inode,
 				     const char *symname);
 
+static int ocfs2_double_lock(struct ocfs2_super *osb,
+			     struct buffer_head **bh1,
+			     struct inode *inode1,
+			     struct buffer_head **bh2,
+			     struct inode *inode2,
+			     int rename);
+
+static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
 /* An orphan dir name is an 8 byte value, printed as a hex string */
 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
 
@@ -678,8 +686,10 @@ static int ocfs2_link(struct dentry *old_dentry,
 {
 	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
+	struct inode *old_dir = old_dentry->d_parent->d_inode;
 	int err;
 	struct buffer_head *fe_bh = NULL;
+	struct buffer_head *old_dir_bh = NULL;
 	struct buffer_head *parent_fe_bh = NULL;
 	struct ocfs2_dinode *fe = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
@@ -696,19 +706,33 @@ static int ocfs2_link(struct dentry *old_dentry,
 
 	dquot_initialize(dir);
 
-	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
+	err = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
+			&parent_fe_bh, dir, 0);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
 		return err;
 	}
 
+	/* make sure both dirs have bhs
+	 * get an extra ref on old_dir_bh if old==new */
+	if (!parent_fe_bh) {
+		if (old_dir_bh) {
+			parent_fe_bh = old_dir_bh;
+			get_bh(parent_fe_bh);
+		} else {
+			mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str);
+			err = -EIO;
+			goto out;
+		}
+	}
+
 	if (!dir->i_nlink) {
 		err = -ENOENT;
 		goto out;
 	}
 
-	err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+	err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
 			old_dentry->d_name.len, &old_de_ino);
 	if (err) {
 		err = -ENOENT;
@@ -801,10 +825,11 @@ out_unlock_inode:
 	ocfs2_inode_unlock(inode, 1);
 
 out:
-	ocfs2_inode_unlock(dir, 1);
+	ocfs2_double_unlock(old_dir, dir);
 
 	brelse(fe_bh);
 	brelse(parent_fe_bh);
+	brelse(old_dir_bh);
 
 	ocfs2_free_dir_lookup_result(&lookup);
 
@@ -1072,14 +1097,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
 }
 
 /*
- * The only place this should be used is rename!
+ * The only place this should be used is rename and link!
  * if they have the same id, then the 1st one is the only one locked.
  */
 static int ocfs2_double_lock(struct ocfs2_super *osb,
 			     struct buffer_head **bh1,
 			     struct inode *inode1,
 			     struct buffer_head **bh2,
-			     struct inode *inode2)
+			     struct inode *inode2,
+			     int rename)
 {
 	int status;
 	int inode1_is_ancestor, inode2_is_ancestor;
@@ -1127,7 +1153,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 		}
 		/* lock id2 */
 		status = ocfs2_inode_lock_nested(inode2, bh2, 1,
-						 OI_LS_RENAME1);
+				rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -1136,7 +1162,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	}
 
 	/* lock id1 */
-	status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
+	status = ocfs2_inode_lock_nested(inode1, bh1, 1,
+			rename == 1 ?  OI_LS_RENAME2 : OI_LS_PARENT);
 	if (status < 0) {
 		/*
 		 * An error return must mean that no cluster locks
@@ -1252,7 +1279,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
 	/* if old and new are the same, this'll just do one lock. */
 	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
-				   &new_dir_bh, new_dir);
+				   &new_dir_bh, new_dir, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;