164 files changed, 7869 insertions, 3470 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index d78e950402c1..a97263be6a91 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -236,10 +236,12 @@ source "fs/nfsd/Kconfig"
 
 config LOCKD
 	tristate
+	depends on FILE_LOCKING
 
 config LOCKD_V4
 	bool
 	depends on NFSD_V3 || NFS_V3
+	depends on FILE_LOCKING
 	default y
 
 config EXPORTFS
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 210acafe4a9b..3ff8bdd18fb3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -432,7 +432,6 @@ vfs_rejected_lock:
 	list_del_init(&fl->fl_u.afs.link);
 	if (list_empty(&vnode->granted_locks))
 		afs_defer_unlock(vnode, key);
-	spin_unlock(&vnode->lock);
 	goto abort_attempt;
 }
 
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index c52be53f6946..5ffb570cd3a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,7 +17,6 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include "internal.h"
 
 
diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 {
 	assert_spin_locked(&ctx->ctx_lock);
 
+	if (req->ki_eventfd != NULL)
+		eventfd_ctx_put(req->ki_eventfd);
 	if (req->ki_dtor)
 		req->ki_dtor(req);
 	if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
 		/* Complete the fput(s) */
 		if (req->ki_filp != NULL)
 			__fput(req->ki_filp);
-		if (req->ki_eventfd != NULL)
-			__fput(req->ki_eventfd);
 
 		/* Link the iocb into the context's free list */
 		spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	int schedule_putreq = 0;
-
 	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
 		req, atomic_long_read(&req->ki_filp->f_count));
 
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	 * we would not be holding the last reference to the file*, so
 	 * this function will be executed w/out any aio kthread wakeup.
 	 */
-	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
-		schedule_putreq++;
-	else
-		req->ki_filp = NULL;
-	if (req->ki_eventfd != NULL) {
-		if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
-			schedule_putreq++;
-		else
-			req->ki_eventfd = NULL;
-	}
-	if (unlikely(schedule_putreq)) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
 		spin_unlock(&fput_lock);
 		queue_work(aio_wq, &fput_work);
-	} else
+	} else {
+		req->ki_filp = NULL;
 		really_put_req(ctx, req);
+	}
 	return 1;
 }
 
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
 			req->ki_eventfd = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b014a5..b7c1603cd4bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	info->thread = NULL;
 
 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
-	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
-
 	if (psinfo == NULL)
 		return 0;
 
+	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
 	/*
 	 * Figure out how many notes we're going to need for each thread.
 	 */
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
 	if (!elf)
 		goto out;
-	
+	/*
+	 * The number of segs are recored into ELF header as 16bit value.
+	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
+	 */
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
 	segs += ELF_CORE_EXTRA_PHDRS;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a241bac..49a34e7f7306 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
 /*
  * bio-integrity.c - bio data integrity extensions
  *
- * Copyright (C) 2007, 2008 Oracle Corporation
+ * Copyright (C) 2007, 2008, 2009 Oracle Corporation
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 
-static struct kmem_cache *bio_integrity_slab __read_mostly;
-static mempool_t *bio_integrity_pool;
-static struct bio_set *integrity_bio_set;
+struct integrity_slab {
+	struct kmem_cache *slab;
+	unsigned short nr_vecs;
+	char name[8];
+};
+
+#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
+struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
+	IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
+};
+#undef IS
+
 static struct workqueue_struct *kintegrityd_wq;
 
+static inline unsigned int vecs_to_idx(unsigned int nr)
+{
+	switch (nr) {
+	case 1:
+		return 0;
+	case 2 ... 4:
+		return 1;
+	case 5 ... 16:
+		return 2;
+	case 17 ... 64:
+		return 3;
+	case 65 ... 128:
+		return 4;
+	case 129 ... BIO_MAX_PAGES:
+		return 5;
+	default:
+		BUG();
+	}
+}
+
+static inline int use_bip_pool(unsigned int idx)
+{
+	if (idx == BIOVEC_NR_POOLS)
+		return 1;
+
+	return 0;
+}
+
 /**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
  * @bio:	bio to attach integrity metadata to
  * @gfp_mask:	Memory allocation mask
  * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ * @bs:		bio_set to allocate from
  *
  * Description: This function prepares a bio for attaching integrity
  * metadata.  nr_vecs specifies the maximum number of pages containing
  * integrity metadata that can be attached.
  */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
-						  gfp_t gfp_mask,
-						  unsigned int nr_vecs)
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+							 gfp_t gfp_mask,
+							 unsigned int nr_vecs,
+							 struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip;
-	struct bio_vec *iv;
-	unsigned long idx;
+	unsigned int idx = vecs_to_idx(nr_vecs);
 
 	BUG_ON(bio == NULL);
+	bip = NULL;
 
-	bip = mempool_alloc(bio_integrity_pool, gfp_mask);
-	if (unlikely(bip == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip\n", __func__);
-		return NULL;
-	}
+	/* Lower order allocations come straight from slab */
+	if (!use_bip_pool(idx))
+		bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
 
-	memset(bip, 0, sizeof(*bip));
+	/* Use mempool if lower order alloc failed or max vecs were requested */
+	if (bip == NULL) {
+		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
 
-	iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
-	if (unlikely(iv == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
-		mempool_free(bip, bio_integrity_pool);
-		return NULL;
+		if (unlikely(bip == NULL)) {
+			printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+			return NULL;
+		}
 	}
 
-	bip->bip_pool = idx;
-	bip->bip_vec = iv;
+	memset(bip, 0, sizeof(*bip));
+
+	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
 
 	return bip;
 }
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio:	bio to attach integrity metadata to
+ * @gfp_mask:	Memory allocation mask
+ * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata.  nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+						  gfp_t gfp_mask,
+						  unsigned int nr_vecs)
+{
+	return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
 EXPORT_SYMBOL(bio_integrity_alloc);
 
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
+ * @bs:		bio_set this bio was allocated from
  *
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
-	mempool_free(bip, bio_integrity_pool);
+	if (use_bip_pool(bip->bip_slab))
+		mempool_free(bip, bs->bio_integrity_pool);
+	else
+		kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
 
 	bio->bi_integrity = NULL;
 }
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct bio_vec *iv;
 
-	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
 	bp->iv1 = bip->bip_vec[0];
 	bp->iv2 = bip->bip_vec[0];
 
-	bp->bip1.bip_vec = &bp->iv1;
-	bp->bip2.bip_vec = &bp->iv2;
+	bp->bip1.bip_vec[0] = bp->iv1;
+	bp->bip2.bip_vec[0] = bp->iv2;
 
 	bp->iv1.bv_len = sectors * bi->tuple_size;
 	bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
  * @bio:	New bio
  * @bio_src:	Original bio
  * @gfp_mask:	Memory allocation mask
+ * @bs:		bio_set to allocate bip from
  *
  * Description:	Called to allocate a bip when cloning a bio
  */
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+			gfp_t gfp_mask, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
 
-	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+	bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
 
 	if (bip == NULL)
 		return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(bio_integrity_clone);
 
-static int __init bio_integrity_init(void)
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
-	kintegrityd_wq = create_workqueue("kintegrityd");
+	unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
+
+	bs->bio_integrity_pool =
+		mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
 
+	if (!bs->bio_integrity_pool)
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+	if (bs->bio_integrity_pool)
+		mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init(void)
+{
+	unsigned int i;
+
+	kintegrityd_wq = create_workqueue("kintegrityd");
 	if (!kintegrityd_wq)
 		panic("Failed to create kintegrityd\n");
 
-	bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
-					SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
+		unsigned int size;
 
-	bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
-						      bio_integrity_slab);
-	if (!bio_integrity_pool)
-		panic("bio_integrity: can't allocate bip pool\n");
+		size = sizeof(struct bio_integrity_payload)
+			+ bip_slab[i].nr_vecs * sizeof(struct bio_vec);
 
-	integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
-	if (!integrity_bio_set)
-		panic("bio_integrity: can't allocate bio_set\n");
-
-	return 0;
+		bip_slab[i].slab =
+			kmem_cache_create(bip_slab[i].name, size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+	}
 }
-subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c914043532..76738005c8e8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, bs);
 
 	/*
 	 * If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 static void bio_kmalloc_destructor(struct bio *bio)
 {
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, fs_bio_set);
 	kfree(bio);
 }
 
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 	if (bio_integrity(bio)) {
 		int ret;
 
-		ret = bio_integrity_clone(b, bio, gfp_mask);
+		ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
 
 		if (ret < 0) {
 			bio_put(b);
@@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
 }
 
 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count, int uncopy,
-			  int do_free_page)
+			  struct sg_iovec *iov, int iov_count,
+			  int to_user, int from_user, int do_free_page)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
 	int iov_idx = 0;
 	unsigned int iov_off = 0;
-	int read = bio_data_dir(bio) == READ;
 
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		char *bv_addr = page_address(bvec->bv_page);
@@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
 			iov_addr = iov[iov_idx].iov_base + iov_off;
 
 			if (!ret) {
-				if (!read && !uncopy)
-					ret = copy_from_user(bv_addr, iov_addr,
-							     bytes);
-				if (read && uncopy)
+				if (to_user)
 					ret = copy_to_user(iov_addr, bv_addr,
 							   bytes);
 
+				if (from_user)
+					ret = copy_from_user(bv_addr, iov_addr,
+							     bytes);
+
 				if (ret)
 					ret = -EFAULT;
 			}
@@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio)
 
 	if (!bio_flagged(bio, BIO_NULL_MAPPED))
 		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
-				     bmd->nr_sgvecs, 1, bmd->is_our_pages);
+				     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+				     0, bmd->is_our_pages);
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
@@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	/*
 	 * success
 	 */
-	if (!write_to_vm && (!map_data || !map_data->null_mapped)) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
+	if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+	    (map_data && map_data->from_user)) {
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
 		if (ret)
 			goto cleanup;
 	}
@@ -1539,6 +1541,7 @@ void bioset_free(struct bio_set *bs)
 	if (bs->bio_pool)
 		mempool_destroy(bs->bio_pool);
 
+	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
 	bio_put_slab(bs);
 
@@ -1579,6 +1582,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
+	if (bioset_integrity_create(bs, pool_size))
+		goto bad;
+
 	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
@@ -1616,6 +1622,7 @@ static int __init init_bio(void)
 	if (!bio_slabs)
 		panic("bio: can't allocate bios\n");
 
+	bio_integrity_init();
 	biovec_init_slabs();
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 603972576f0f..f128427b995b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
 
 #ifdef CONFIG_FS_POSIX_ACL
 
-static void btrfs_update_cached_acl(struct inode *inode,
-				    struct posix_acl **p_acl,
-				    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 {
 	int size;
 	const char *name;
 	char *value = NULL;
-	struct posix_acl *acl = NULL, **p_acl;
+	struct posix_acl *acl;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &BTRFS_I(inode)->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &BTRFS_I(inode)->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	/* Handle the cached NULL acl case without locking */
-	acl = ACCESS_ONCE(*p_acl);
-	if (!acl)
-		return acl;
-
-	spin_lock(&inode->i_lock);
-	acl = *p_acl;
-	if (acl != BTRFS_ACL_NOT_CACHED)
-		acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-
-	if (acl != BTRFS_ACL_NOT_CACHED)
-		return acl;
-
 	size = __btrfs_getxattr(inode, name, "", 0);
 	if (size > 0) {
 		value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 		if (size > 0) {
 			acl = posix_acl_from_xattr(value, size);
-			btrfs_update_cached_acl(inode, p_acl, acl);
+			set_cached_acl(inode, type, acl);
 		}
 		kfree(value);
 	} else if (size == -ENOENT || size == -ENODATA || size == 0) {
 		/* FIXME, who returns -ENOENT?  I think nobody */
 		acl = NULL;
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 	} else {
 		acl = ERR_PTR(-EIO);
 	}
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int ret, size = 0;
 	const char *name;
-	struct posix_acl **p_acl;
 	char *value = NULL;
 	mode_t mode;
 
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		ret = 0;
 		inode->i_mode = mode;
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &BTRFS_I(inode)->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EINVAL : 0;
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &BTRFS_I(inode)->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
@@ -172,7 +146,7 @@ out:
 	kfree(value);
 
 	if (!ret)
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return ret;
 }
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..6e4f6c50a120 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
 					   "btrfs-%s-%d", workers->name,
 					   workers->num_workers + i);
 		if (IS_ERR(worker->task)) {
-			kfree(worker);
 			ret = PTR_ERR(worker->task);
+			kfree(worker);
 			goto fail;
 		}
 
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index acb4f3517582..ea1ea0af8c0e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -53,10 +53,6 @@ struct btrfs_inode {
 	/* used to order data wrt metadata */
 	struct btrfs_ordered_inode_tree ordered_tree;
 
-	/* standard acl pointers */
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
-
 	/* for keeping track of orphaned inodes */
 	struct list_head i_orphan;
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..98a873838717 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_MAGIC "_BHRfS_M"
 
-#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
-
 #define BTRFS_MAX_LEVEL 8
 
 #define BTRFS_COMPAT_EXTENT_TREE_V0
@@ -2076,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root);
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..a5aca3997d42 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
 	return type;
 }
 
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+			 struct btrfs_key *key)
 
 {
-	int level;
-	BUG_ON(!path->keep_locks);
-	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+	for (; level < BTRFS_MAX_LEVEL; level++) {
 		if (!path->nodes[level])
 			break;
-		btrfs_assert_tree_locked(path->nodes[level]);
 		if (path->slots[level] + 1 >=
 		    btrfs_header_nritems(path->nodes[level]))
 			continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		 * For simplicity, we just do not add new inline back
 		 * ref if there is any kind of item for this block
 		 */
-		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+		if (find_next_key(path, 0, &key) == 0 &&
+		    key.objectid == bytenr &&
 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
 			err = -EAGAIN;
 			goto out;
@@ -2697,7 +2696,7 @@ again:
 
 		printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
 		       ", %llu bytes_used, %llu bytes_reserved, "
-		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
 		       "%llu total\n", (unsigned long long)bytes,
 		       (unsigned long long)data_sinfo->bytes_delalloc,
 		       (unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 	return buf;
 }
 
+#if 0
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf)
 {
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-#if 0
-
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
 					struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
 }
 #endif
 
+struct walk_control {
+	u64 refs[BTRFS_MAX_LEVEL];
+	u64 flags[BTRFS_MAX_LEVEL];
+	struct btrfs_key update_progress;
+	int stage;
+	int level;
+	int shared_level;
+	int update_ref;
+	int keep_locks;
+};
+
+#define DROP_REFERENCE	1
+#define UPDATE_BACKREF	2
+
 /*
- * helper function for drop_subtree, this function is similar to
- * walk_down_tree. The main difference is that it checks reference
- * counts while tree blocks are locked.
+ * hepler to process tree block while walking down the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block. if the block is shared and
+ * we need update back refs for the subtree rooted at the
+ * block, this function changes wc->stage to UPDATE_BACKREF
+ *
+ * when wc->stage == UPDATE_BACKREF, this function updates
+ * back refs for pointers in the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
  */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
-				   struct btrfs_path *path, int *level)
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
 {
-	struct extent_buffer *next;
-	struct extent_buffer *cur;
-	struct extent_buffer *parent;
-	u64 bytenr;
-	u64 ptr_gen;
-	u64 refs;
-	u64 flags;
-	u32 blocksize;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	struct btrfs_key key;
+	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 	int ret;
 
-	cur = path->nodes[*level];
-	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
-				       &refs, &flags);
-	BUG_ON(ret);
-	if (refs > 1)
-		goto out;
+	if (wc->stage == UPDATE_BACKREF &&
+	    btrfs_header_owner(eb) != root->root_key.objectid)
+		return 1;
 
-	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/*
+	 * when reference count of tree block is 1, it won't increase
+	 * again. once full backref flag is set, we never clear it.
+	 */
+	if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+	    (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_lookup_extent_info(trans, root,
+					       eb->start, eb->len,
+					       &wc->refs[level],
+					       &wc->flags[level]);
+		BUG_ON(ret);
+		BUG_ON(wc->refs[level] == 0);
+	}
 
-	while (*level >= 0) {
-		cur = path->nodes[*level];
-		if (*level == 0) {
-			ret = btrfs_drop_leaf_ref(trans, root, cur);
-			BUG_ON(ret);
-			clean_tree_block(trans, root, cur);
-			break;
-		}
-		if (path->slots[*level] >= btrfs_header_nritems(cur)) {
-			clean_tree_block(trans, root, cur);
-			break;
+	if (wc->stage == DROP_REFERENCE &&
+	    wc->update_ref && wc->refs[level] > 1) {
+		BUG_ON(eb == root->node);
+		BUG_ON(path->slots[level] > 0);
+		if (level == 0)
+			btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
+		else
+			btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
+		if (btrfs_header_owner(eb) == root->root_key.objectid &&
+		    btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
+			wc->stage = UPDATE_BACKREF;
+			wc->shared_level = level;
 		}
+	}
 
-		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-		blocksize = btrfs_level_size(root, *level - 1);
-		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+	if (wc->stage == DROP_REFERENCE) {
+		if (wc->refs[level] > 1)
+			return 1;
 
-		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-		btrfs_tree_lock(next);
-		btrfs_set_lock_blocking(next);
+		if (path->locks[level] && !wc->keep_locks) {
+			btrfs_tree_unlock(eb);
+			path->locks[level] = 0;
+		}
+		return 0;
+	}
 
-		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-					       &refs, &flags);
+	/* wc->stage == UPDATE_BACKREF */
+	if (!(wc->flags[level] & flag)) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_inc_ref(trans, root, eb, 1);
 		BUG_ON(ret);
-		if (refs > 1) {
-			parent = path->nodes[*level];
-			ret = btrfs_free_extent(trans, root, bytenr,
-						blocksize, parent->start,
-						btrfs_header_owner(parent),
-						*level - 1, 0);
+		ret = btrfs_dec_ref(trans, root, eb, 0);
+		BUG_ON(ret);
+		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
+						  eb->len, flag, 0);
+		BUG_ON(ret);
+		wc->flags[level] |= flag;
+	}
+
+	/*
+	 * the block is shared by multiple trees, so it's not good to
+	 * keep the tree lock
+	 */
+	if (path->locks[level] && level > 0) {
+		btrfs_tree_unlock(eb);
+		path->locks[level] = 0;
+	}
+	return 0;
+}
+
+/*
+ * hepler to process tree block while walking up the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function drops
+ * reference count on the block.
+ *
+ * when wc->stage == UPDATE_BACKREF, this function changes
+ * wc->stage back to DROP_REFERENCE if we changed wc->stage
+ * to UPDATE_BACKREF previously while processing the block.
+ *
+ * NOTE: return value 1 means we should stop walking up.
+ */
+static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct walk_control *wc)
+{
+	int ret = 0;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	u64 parent = 0;
+
+	if (wc->stage == UPDATE_BACKREF) {
+		BUG_ON(wc->shared_level < level);
+		if (level < wc->shared_level)
+			goto out;
+
+		BUG_ON(wc->refs[level] <= 1);
+		ret = find_next_key(path, level + 1, &wc->update_progress);
+		if (ret > 0)
+			wc->update_ref = 0;
+
+		wc->stage = DROP_REFERENCE;
+		wc->shared_level = -1;
+		path->slots[level] = 0;
+
+		/*
+		 * check reference count again if the block isn't locked.
+		 * we should start walking down the tree again if reference
+		 * count is one.
+		 */
+		if (!path->locks[level]) {
+			BUG_ON(level == 0);
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						       eb->start, eb->len,
+						       &wc->refs[level],
+						       &wc->flags[level]);
 			BUG_ON(ret);
-			path->slots[*level]++;
-			btrfs_tree_unlock(next);
-			free_extent_buffer(next);
-			continue;
+			BUG_ON(wc->refs[level] == 0);
+			if (wc->refs[level] == 1) {
+				btrfs_tree_unlock(eb);
+				path->locks[level] = 0;
+				return 1;
+			}
+		} else {
+			BUG_ON(level != 0);
 		}
+	}
 
-		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/* wc->stage == DROP_REFERENCE */
+	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
 
-		*level = btrfs_header_level(next);
-		path->nodes[*level] = next;
-		path->slots[*level] = 0;
-		path->locks[*level] = 1;
-		cond_resched();
+	if (wc->refs[level] == 1) {
+		if (level == 0) {
+			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+				ret = btrfs_dec_ref(trans, root, eb, 1);
+			else
+				ret = btrfs_dec_ref(trans, root, eb, 0);
+			BUG_ON(ret);
+		}
+		/* make block locked assertion in clean_tree_block happy */
+		if (!path->locks[level] &&
+		    btrfs_header_generation(eb) == trans->transid) {
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+		}
+		clean_tree_block(trans, root, eb);
+	}
+
+	if (eb == root->node) {
+		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = eb->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(eb));
+	} else {
+		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = path->nodes[level + 1]->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(path->nodes[level + 1]));
 	}
-out:
-	if (path->nodes[*level] == root->node)
-		parent = path->nodes[*level];
-	else
-		parent = path->nodes[*level + 1];
-	bytenr = path->nodes[*level]->start;
-	blocksize = path->nodes[*level]->len;
 
-	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
-				btrfs_header_owner(parent), *level, 0);
+	ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
+				root->root_key.objectid, level, 0);
 	BUG_ON(ret);
+out:
+	wc->refs[level] = 0;
+	wc->flags[level] = 0;
+	return ret;
+}
+
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
+{
+	struct extent_buffer *next;
+	struct extent_buffer *cur;
+	u64 bytenr;
+	u64 ptr_gen;
+	u32 blocksize;
+	int level = wc->level;
+	int ret;
+
+	while (level >= 0) {
+		cur = path->nodes[level];
+		BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
 
-	if (path->locks[*level]) {
-		btrfs_tree_unlock(path->nodes[*level]);
-		path->locks[*level] = 0;
+		ret = walk_down_proc(trans, root, path, wc);
+		if (ret > 0)
+			break;
+
+		if (level == 0)
+			break;
+
+		bytenr = btrfs_node_blockptr(cur, path->slots[level]);
+		blocksize = btrfs_level_size(root, level - 1);
+		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
+
+		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		btrfs_tree_lock(next);
+		btrfs_set_lock_blocking(next);
+
+		level--;
+		BUG_ON(level != btrfs_header_level(next));
+		path->nodes[level] = next;
+		path->slots[level] = 0;
+		path->locks[level] = 1;
+		wc->level = level;
 	}
-	free_extent_buffer(path->nodes[*level]);
-	path->nodes[*level] = NULL;
-	*level += 1;
-	cond_resched();
 	return 0;
 }
 
-/*
- * helper for dropping snapshots.  This walks back up the tree in the path
- * to find the first node higher up where we haven't yet gone through
- * all the slots
- */
 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
-				 int *level, int max_level)
+				 struct walk_control *wc, int max_level)
 {
-	struct btrfs_root_item *root_item = &root->root_item;
-	int i;
-	int slot;
+	int level = wc->level;
 	int ret;
 
-	for (i = *level; i < max_level && path->nodes[i]; i++) {
-		slot = path->slots[i];
-		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-			/*
-			 * there is more work to do in this level.
-			 * Update the drop_progress marker to reflect
-			 * the work we've done so far, and then bump
-			 * the slot number
-			 */
-			path->slots[i]++;
-			WARN_ON(*level == 0);
-			if (max_level == BTRFS_MAX_LEVEL) {
-				btrfs_node_key(path->nodes[i],
-					       &root_item->drop_progress,
-					       path->slots[i]);
-				root_item->drop_level = i;
-			}
-			*level = i;
+	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
+	while (level < max_level && path->nodes[level]) {
+		wc->level = level;
+		if (path->slots[level] + 1 <
+		    btrfs_header_nritems(path->nodes[level])) {
+			path->slots[level]++;
 			return 0;
 		} else {
-			struct extent_buffer *parent;
-
-			/*
-			 * this whole node is done, free our reference
-			 * on it and go up one level
-			 */
-			if (path->nodes[*level] == root->node)
-				parent = path->nodes[*level];
-			else
-				parent = path->nodes[*level + 1];
+			ret = walk_up_proc(trans, root, path, wc);
+			if (ret > 0)
+				return 0;
 
-			clean_tree_block(trans, root, path->nodes[i]);
-			ret = btrfs_free_extent(trans, root,
-						path->nodes[i]->start,
-						path->nodes[i]->len,
-						parent->start,
-						btrfs_header_owner(parent),
-						*level, 0);
-			BUG_ON(ret);
-			if (path->locks[*level]) {
-				btrfs_tree_unlock(path->nodes[i]);
-				path->locks[i] = 0;
+			if (path->locks[level]) {
+				btrfs_tree_unlock(path->nodes[level]);
+				path->locks[level] = 0;
 			}
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-			*level = i + 1;
+			free_extent_buffer(path->nodes[level]);
+			path->nodes[level] = NULL;
+			level++;
 		}
 	}
 	return 1;
 }
 
 /*
- * drop the reference count on the tree rooted at 'snap'.  This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
+ * drop a subvolume tree.
+ *
+ * this function traverses the tree freeing any blocks that only
+ * referenced by the tree.
+ *
+ * when a shared tree block is found. this function decreases its
+ * reference count by one. if update_ref is true, this function
+ * also make sure backrefs for the shared block and all lower level
+ * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root)
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
 {
-	int ret = 0;
-	int wret;
-	int level;
 	struct btrfs_path *path;
-	int update_count;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_root_item *root_item = &root->root_item;
+	struct walk_control *wc;
+	struct btrfs_key key;
+	int err = 0;
+	int ret;
+	int level;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
-	level = btrfs_header_level(root->node);
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
+	trans = btrfs_start_transaction(tree_root, 1);
+
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+		level = btrfs_header_level(root->node);
 		path->nodes[level] = btrfs_lock_root_node(root);
 		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
 		path->locks[level] = 1;
+		memset(&wc->update_progress, 0,
+		       sizeof(wc->update_progress));
 	} else {
-		struct btrfs_key key;
-		struct btrfs_disk_key found_key;
-		struct extent_buffer *node;
-
 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+		memcpy(&wc->update_progress, &key,
+		       sizeof(wc->update_progress));
+
 		level = root_item->drop_level;
+		BUG_ON(level == 0);
 		path->lowest_level = level;
-		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (wret < 0) {
-			ret = wret;
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		path->lowest_level = 0;
+		if (ret < 0) {
+			err = ret;
 			goto out;
 		}
-		node = path->nodes[level];
-		btrfs_node_key(node, &found_key, path->slots[level]);
-		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
-			       sizeof(found_key)));
+		btrfs_node_key_to_cpu(path->nodes[level], &key,
+				      path->slots[level]);
+		WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+
 		/*
 		 * unlock our path, this is safe because only this
 		 * function is allowed to delete this snapshot
 		 */
 		btrfs_unlock_up_safe(path, 0);
+
+		level = btrfs_header_level(root->node);
+		while (1) {
+			btrfs_tree_lock(path->nodes[level]);
+			btrfs_set_lock_blocking(path->nodes[level]);
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						path->nodes[level]->start,
+						path->nodes[level]->len,
+						&wc->refs[level],
+						&wc->flags[level]);
+			BUG_ON(ret);
+			BUG_ON(wc->refs[level] == 0);
+
+			if (level == root_item->drop_level)
+				break;
+
+			btrfs_tree_unlock(path->nodes[level]);
+			WARN_ON(wc->refs[level] != 1);
+			level--;
+		}
 	}
+
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = update_ref;
+	wc->keep_locks = 0;
+
 	while (1) {
-		unsigned long update;
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret > 0)
+		ret = walk_down_tree(trans, root, path, wc);
+		if (ret < 0) {
+			err = ret;
 			break;
-		if (wret < 0)
-			ret = wret;
+		}
 
-		wret = walk_up_tree(trans, root, path, &level,
-				    BTRFS_MAX_LEVEL);
-		if (wret > 0)
+		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
+		if (ret < 0) {
+			err = ret;
 			break;
-		if (wret < 0)
-			ret = wret;
-		if (trans->transaction->in_commit ||
-		    trans->transaction->delayed_refs.flushing) {
-			ret = -EAGAIN;
+		}
+
+		if (ret > 0) {
+			BUG_ON(wc->stage != DROP_REFERENCE);
 			break;
 		}
-		for (update_count = 0; update_count < 16; update_count++) {
+
+		if (wc->stage == DROP_REFERENCE) {
+			level = wc->level;
+			btrfs_node_key(path->nodes[level],
+				       &root_item->drop_progress,
+				       path->slots[level]);
+			root_item->drop_level = level;
+		}
+
+		BUG_ON(wc->level == 0);
+		if (trans->transaction->in_commit ||
+		    trans->transaction->delayed_refs.flushing) {
+			ret = btrfs_update_root(trans, tree_root,
+						&root->root_key,
+						root_item);
+			BUG_ON(ret);
+
+			btrfs_end_transaction(trans, tree_root);
+			trans = btrfs_start_transaction(tree_root, 1);
+		} else {
+			unsigned long update;
 			update = trans->delayed_ref_updates;
 			trans->delayed_ref_updates = 0;
 			if (update)
-				btrfs_run_delayed_refs(trans, root, update);
-			else
-				break;
+				btrfs_run_delayed_refs(trans, tree_root,
+						       update);
 		}
 	}
+	btrfs_release_path(root, path);
+	BUG_ON(err);
+
+	ret = btrfs_del_root(trans, tree_root, &root->root_key);
+	BUG_ON(ret);
+
+	free_extent_buffer(root->node);
+	free_extent_buffer(root->commit_root);
+	kfree(root);
 out:
+	btrfs_end_transaction(trans, tree_root);
+	kfree(wc);
 	btrfs_free_path(path);
-	return ret;
+	return err;
 }
 
+/*
+ * drop subtree rooted at tree block 'node'.
+ *
+ * NOTE: this function will unlock and release tree block 'node'
+ */
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
 			struct extent_buffer *parent)
 {
 	struct btrfs_path *path;
+	struct walk_control *wc;
 	int level;
 	int parent_level;
 	int ret = 0;
 	int wret;
 
+	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
 	btrfs_assert_tree_locked(parent);
 	parent_level = btrfs_header_level(parent);
 	extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 
 	btrfs_assert_tree_locked(node);
 	level = btrfs_header_level(node);
-	extent_buffer_get(node);
 	path->nodes[level] = node;
 	path->slots[level] = 0;
+	path->locks[level] = 1;
+
+	wc->refs[parent_level] = 1;
+	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = 0;
+	wc->keep_locks = 1;
 
 	while (1) {
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret < 0)
+		wret = walk_down_tree(trans, root, path, wc);
+		if (wret < 0) {
 			ret = wret;
-		if (wret != 0)
 			break;
+		}
 
-		wret = walk_up_tree(trans, root, path, &level, parent_level);
+		wret = walk_up_tree(trans, root, path, wc, parent_level);
 		if (wret < 0)
 			ret = wret;
 		if (wret != 0)
 			break;
 	}
 
+	kfree(wc);
 	btrfs_free_path(path);
 	return ret;
 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..7c3cd248d8d6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	}
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
-		btrfs_update_inode(trans, root, inode);
+		/* we've only changed i_size in ram, and we haven't updated
+		 * the disk i_size.  There is no need to log the inode
+		 * at this time.
+		 */
 	}
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8612b3a09811..7ffa3d34ea19 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	 * any xattrs or acls
 	 */
 	maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
-	if (!maybe_acls) {
-		BTRFS_I(inode)->i_acl = NULL;
-		BTRFS_I(inode)->i_default_acl = NULL;
-	}
+	if (!maybe_acls)
+		cache_no_acl(inode);
 
 	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
 						alloc_group_block, 0);
@@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode)
 {
 	struct btrfs_inode *bi = BTRFS_I(inode);
 
-	bi->i_acl = BTRFS_ACL_NOT_CACHED;
-	bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
-
 	bi->generation = 0;
 	bi->sequence = 0;
 	bi->last_trans = 0;
@@ -3585,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 		owner = 1;
 	BTRFS_I(inode)->block_group =
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
-	if ((mode & S_IFREG)) {
-		if (btrfs_test_opt(root, NODATASUM))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-		if (btrfs_test_opt(root, NODATACOW))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
-	}
 
 	key[0].objectid = objectid;
 	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3645,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
 	btrfs_inherit_iflags(inode, dir);
 
+	if ((mode & S_IFREG)) {
+		if (btrfs_test_opt(root, NODATASUM))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+		if (btrfs_test_opt(root, NODATACOW))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+	}
+
 	insert_inode_hash(inode);
 	inode_tree_add(inode);
 	return inode;
@@ -4640,8 +4636,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->last_trans = 0;
 	ei->logged_trans = 0;
 	btrfs_ordered_inode_tree_init(&ei->ordered_tree);
-	ei->i_acl = BTRFS_ACL_NOT_CACHED;
-	ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
 	INIT_LIST_HEAD(&ei->i_orphan);
 	INIT_LIST_HEAD(&ei->ordered_operations);
 	return &ei->vfs_inode;
@@ -4655,13 +4649,6 @@ void btrfs_destroy_inode(struct inode *inode)
 	WARN_ON(!list_empty(&inode->i_dentry));
 	WARN_ON(inode->i_data.nrpages);
 
-	if (BTRFS_I(inode)->i_acl &&
-	    BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
-		posix_acl_release(BTRFS_I(inode)->i_acl);
-	if (BTRFS_I(inode)->i_default_acl &&
-	    BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
-		posix_acl_release(BTRFS_I(inode)->i_default_acl);
-
 	/*
 	 * Make sure we're properly removed from the ordered operation
 	 * lists.
@@ -5096,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 	struct extent_map *em;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
 	int ret;
 
 	alloc_start = offset & ~mask;
@@ -5114,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 			goto out;
 	}
 
+	root = BTRFS_I(inode)->root;
+
+	ret = btrfs_check_data_free_space(root, inode,
+					  alloc_end - alloc_start);
+	if (ret)
+		goto out;
+
 	locked_end = alloc_end - 1;
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
@@ -5121,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
 		if (!trans) {
 			ret = -EIO;
-			goto out;
+			goto out_free;
 		}
 
 		/* the extent lock is ordered inside the running
@@ -5182,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		      GFP_NOFS);
 
 	btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+out_free:
+	btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
 out:
 	mutex_unlock(&inode->i_mutex);
 	return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..9f4db848db10 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 						struct btrfs_file_extent_item);
 			comp = btrfs_file_extent_compression(leaf, extent);
 			type = btrfs_file_extent_type(leaf, extent);
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				disko = btrfs_file_extent_disk_bytenr(leaf,
 								      extent);
 				diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			new_key.objectid = inode->i_ino;
 			new_key.offset = key.offset + destoff - off;
 
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				ret = btrfs_insert_empty_item(trans, root, path,
 							      &new_key, size);
 				if (ret)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..008397934778 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
 		btrfs_end_transaction(trans, root);
 	}
 
-	btrfs_drop_dead_root(reloc_root);
+	btrfs_drop_snapshot(reloc_root, 0);
 
 	if (atomic_dec_and_test(async->num_pending))
 		complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
 			BUG_ON(ret);
-
-			btrfs_tree_unlock(eb);
-			free_extent_buffer(eb);
 		}
 		if (!lowest) {
 			btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..2dbf1c1f56ee 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 	return 0;
 }
 
+#if 0
 /*
  * when dropping snapshots, we generate a ton of delayed refs, and it makes
  * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
 	btrfs_btree_balance_dirty(tree_root, nr);
 	return ret;
 }
+#endif
 
 /*
  * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 	while (!list_empty(&list)) {
 		root = list_entry(list.next, struct btrfs_root, root_list);
 		list_del_init(&root->root_list);
-		btrfs_drop_dead_root(root);
+		btrfs_drop_snapshot(root, 0);
 	}
 	return 0;
 }
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index b48689839428..92888aa90749 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,11 @@ client generated ones by default (mount option "serverino" turned
 on by default if server supports it).  Add forceuid and forcegid
 mount options (so that when negotiating unix extensions specifying
 which uid mounted does not immediately force the server's reported
-uids to be overridden).
+uids to be overridden).  Add support for scope mount parm. Improve
+hard link detection to use same inode for both.  Do not set
+read-only dos attribute on directories (for chmod) since Windows
+explorer special cases this attribute bit for directories for
+a different purpose.
 
 Version 1.58
 ------------
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 1b09f1670061..20692fbfdb24 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -49,6 +49,7 @@
 #define ASN1_OJI	6	/* Object Identifier  */
 #define ASN1_OJD	7	/* Object Description */
 #define ASN1_EXT	8	/* External */
+#define ASN1_ENUM	10	/* Enumerated */
 #define ASN1_SEQ	16	/* Sequence */
 #define ASN1_SET	17	/* Set */
 #define ASN1_NUMSTR	18	/* Numerical String */
@@ -78,10 +79,12 @@
 #define SPNEGO_OID_LEN 7
 #define NTLMSSP_OID_LEN  10
 #define KRB5_OID_LEN  7
+#define KRB5U2U_OID_LEN  8
 #define MSKRB5_OID_LEN  7
 static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
 static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
 static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
+static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
 static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
 
 /*
@@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
 	return 1;
 }
 
+#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */
+static unsigned char
+asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val)
+{
+	unsigned char ch;
+
+	if (ctx->pointer >= ctx->end) {
+		ctx->error = ASN1_ERR_DEC_EMPTY;
+		return 0;
+	}
+
+	ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */
+	if ((ch) == ASN1_ENUM)  /* if ch value is ENUM, 0xa */
+		*val = *(++(ctx->pointer)); /* value has enum value */
+	else
+		return 0;
+
+	ctx->pointer++;
+	return 1;
+}
+#endif
+
 static unsigned char
 asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
 {
@@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	unsigned int cls, con, tag, oidlen, rc;
 	bool use_ntlmssp = false;
 	bool use_kerberos = false;
+	bool use_kerberosu2u = false;
 	bool use_mskerberos = false;
 
-	*secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
-
 	/* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
 
 	asn1_open(&ctx, security_blob, length);
@@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* SPNEGO */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding negTokenInit"));
 		return 0;
@@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* negTokenInit */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding negTokenInit"));
 		return 0;
@@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* sequence */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
 		return 0;
@@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* sequence of */
 	if (asn1_header_decode
 	    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding 2nd part of negTokenInit"));
@@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		return 0;
 	}
 
+	/* list of security mechanisms */
 	while (!asn1_eoc_decode(&ctx, sequence_end)) {
 		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
 		if (!rc) {
@@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
 						MSKRB5_OID_LEN) &&
-						!use_kerberos)
+						!use_mskerberos)
 					use_mskerberos = true;
+				else if (compare_oid(oid, oidlen, KRB5U2U_OID,
+						     KRB5U2U_OID_LEN) &&
+						     !use_kerberosu2u)
+					use_kerberosu2u = true;
 				else if (compare_oid(oid, oidlen, KRB5_OID,
 						     KRB5_OID_LEN) &&
-						     !use_mskerberos)
+						     !use_kerberos)
 					use_kerberos = true;
 				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
 						     NTLMSSP_OID_LEN))
@@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 	}
 
+	/* mechlistMIC */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+		/* Check if we have reached the end of the blob, but with
+		   no mechListMic (e.g. NTLMSSP instead of KRB5) */
+		if (ctx.error == ASN1_ERR_DEC_EMPTY)
+			goto decode_negtoken_exit;
 		cFYI(1, ("Error decoding last part negTokenInit exit3"));
 		return 0;
 	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
@@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			 cls, con, tag, end, *end));
 		return 0;
 	}
+
+	/* sequence */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit5"));
 		return 0;
@@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			cls, con, tag, end, *end));
 	}
 
+	/* sequence of */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit 7"));
 		return 0;
@@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			 cls, con, tag, end, *end));
 		return 0;
 	}
+
+	/* general string */
 	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
 		cFYI(1, ("Error decoding last part negTokenInit exit9"));
 		return 0;
@@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 	}
 	cFYI(1, ("Need to call asn1_octets_decode() function for %s",
 		 ctx.pointer));	/* is this UTF-8 or ASCII? */
-
+decode_negtoken_exit:
 	if (use_kerberos)
 		*secType = Kerberos;
 	else if (use_mskerberos)
 		*secType = MSKerberos;
 	else if (use_ntlmssp)
-		*secType = NTLMSSP;
+		*secType = RawNTLMSSP;
 
 	return 1;
 }
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4a4581cb2b5e..051caecf7d67 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = {
 /* strlen of ";user=" */
 #define USER_KEY_LEN		6
 
+/* strlen of ";pid=0x" */
+#define PID_KEY_LEN		7
+
 /* get a key struct with a SPNEGO security blob, suitable for session setup */
 struct key *
 cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
@@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 		   IP_KEY_LEN + INET6_ADDRSTRLEN +
 		   MAX_MECH_STR_LEN +
 		   UID_KEY_LEN + (sizeof(uid_t) * 2) +
-		   USER_KEY_LEN + strlen(sesInfo->userName) + 1;
+		   USER_KEY_LEN + strlen(sesInfo->userName) +
+		   PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
 
 	spnego_key = ERR_PTR(-ENOMEM);
 	description = kzalloc(desc_len, GFP_KERNEL);
@@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 	dp = description + strlen(description);
 	sprintf(dp, ";user=%s", sesInfo->userName);
 
+	dp = description + strlen(description);
+	sprintf(dp, ";pid=0x%x", current->pid);
+
 	cFYI(1, ("key description = %s", description));
 	spnego_key = request_key(&cifs_spnego_key_type, description, "");
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1403b5d86a73..6941c22398a6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
 
 static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 		       struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
-		       struct inode *inode)
+		       struct cifs_fattr *fattr)
 {
 	int i;
 	int num_aces = 0;
@@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	if (!pdacl) {
 		/* no DACL in the security descriptor, set
 		   all the permissions for user/group/other */
-		inode->i_mode |= S_IRWXUGO;
+		fattr->cf_mode |= S_IRWXUGO;
 		return;
 	}
 
@@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	/* reset rwx permissions for user/group/other.
 	   Also, if num_aces is 0 i.e. DACL has no ACEs,
 	   user/group/other have no permissions */
-	inode->i_mode &= ~(S_IRWXUGO);
+	fattr->cf_mode &= ~(S_IRWXUGO);
 
 	acl_base = (char *)pdacl;
 	acl_size = sizeof(struct cifs_acl);
@@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 			if (compare_sids(&(ppace[i]->sid), pownersid))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &user_mask);
 			if (compare_sids(&(ppace[i]->sid), pgrpsid))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &group_mask);
 			if (compare_sids(&(ppace[i]->sid), &sid_everyone))
 				access_flags_to_mode(ppace[i]->access_req,
 						     ppace[i]->type,
-						     &(inode->i_mode),
+						     &fattr->cf_mode,
 						     &other_mask);
 
 /*			memcpy((void *)(&(cifscred->aces[i])),
@@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 
 /* Convert CIFS ACL to POSIX form */
 static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
-			  struct inode *inode)
+			  struct cifs_fattr *fattr)
 {
 	int rc;
 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
@@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 	char *end_of_acl = ((char *)pntsd) + acl_len;
 	__u32 dacloffset;
 
-	if ((inode == NULL) || (pntsd == NULL))
+	if (pntsd == NULL)
 		return -EIO;
 
 	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 
 	if (dacloffset)
 		parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
-			   group_sid_ptr, inode);
+			   group_sid_ptr, fattr);
 	else
 		cFYI(1, ("no ACL")); /* BB grant all or default perms? */
 
@@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
 	memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
 			sizeof(struct cifs_sid)); */
 
-
 	return 0;
 }
 
@@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
-		     const char *path, const __u16 *pfid)
+void
+cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
+		  struct inode *inode, const char *path, const __u16 *pfid)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	u32 acllen = 0;
@@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
 	if (pntsd)
-		rc = parse_sec_desc(pntsd, acllen, inode);
+		rc = parse_sec_desc(pntsd, acllen, fattr);
 	if (rc)
 		cFYI(1, ("parse sec desc failed rc = %d", rc));
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0d92114195ab..44f30504b82d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb)
 	if (!cifs_inode)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
-	atomic_set(&cifs_inode->inUse, 0);
 	cifs_inode->time = 0;
 	cifs_inode->write_behind_rc = 0;
 	/* Until the file is open and we have gotten oplock
@@ -333,6 +332,27 @@ cifs_destroy_inode(struct inode *inode)
 	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
 }
 
+static void
+cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
+{
+	seq_printf(s, ",addr=");
+
+	switch (server->addr.sockAddr.sin_family) {
+	case AF_INET:
+		seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
+		break;
+	case AF_INET6:
+		seq_printf(s, "%pI6",
+			   &server->addr.sockAddr6.sin6_addr.s6_addr);
+		if (server->addr.sockAddr6.sin6_scope_id)
+			seq_printf(s, "%%%u",
+				   server->addr.sockAddr6.sin6_scope_id);
+		break;
+	default:
+		seq_printf(s, "(unknown)");
+	}
+}
+
 /*
  * cifs_show_options() is for displaying mount options in /proc/mounts.
  * Not all settable options are displayed but most of the important
@@ -343,83 +363,64 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 {
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
-	struct TCP_Server_Info *server;
 
 	cifs_sb = CIFS_SB(m->mnt_sb);
+	tcon = cifs_sb->tcon;
 
-	if (cifs_sb) {
-		tcon = cifs_sb->tcon;
-		if (tcon) {
-			seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
-			if (tcon->ses) {
-				if (tcon->ses->userName)
-					seq_printf(s, ",username=%s",
-					   tcon->ses->userName);
-				if (tcon->ses->domainName)
-					seq_printf(s, ",domain=%s",
-					   tcon->ses->domainName);
-				server = tcon->ses->server;
-				if (server) {
-					seq_printf(s, ",addr=");
-					switch (server->addr.sockAddr6.
-						sin6_family) {
-					case AF_INET6:
-						seq_printf(s, "%pI6",
-							   &server->addr.sockAddr6.sin6_addr);
-						break;
-					case AF_INET:
-						seq_printf(s, "%pI4",
-							   &server->addr.sockAddr.sin_addr.s_addr);
-						break;
-					}
-				}
-			}
-			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
-			   !(tcon->unix_ext))
-				seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
-			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
-			   !(tcon->unix_ext))
-				seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
-			if (!tcon->unix_ext) {
-				seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
+	seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
+	if (tcon->ses->userName)
+		seq_printf(s, ",username=%s", tcon->ses->userName);
+	if (tcon->ses->domainName)
+		seq_printf(s, ",domain=%s", tcon->ses->domainName);
+
+	seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+		seq_printf(s, ",forceuid");
+
+	seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+		seq_printf(s, ",forcegid");
+
+	cifs_show_address(s, tcon->ses->server);
+
+	if (!tcon->unix_ext)
+		seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
 					   cifs_sb->mnt_file_mode,
 					   cifs_sb->mnt_dir_mode);
-			}
-			if (tcon->seal)
-				seq_printf(s, ",seal");
-			if (tcon->nocase)
-				seq_printf(s, ",nocase");
-			if (tcon->retry)
-				seq_printf(s, ",hard");
-		}
-		if (cifs_sb->prepath)
-			seq_printf(s, ",prepath=%s", cifs_sb->prepath);
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
-			seq_printf(s, ",posixpaths");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
-			seq_printf(s, ",setuids");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
-			seq_printf(s, ",serverino");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
-			seq_printf(s, ",directio");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
-			seq_printf(s, ",nouser_xattr");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
-			seq_printf(s, ",mapchars");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
-			seq_printf(s, ",sfu");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			seq_printf(s, ",nobrl");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-			seq_printf(s, ",cifsacl");
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
-			seq_printf(s, ",dynperm");
-		if (m->mnt_sb->s_flags & MS_POSIXACL)
-			seq_printf(s, ",acl");
-
-		seq_printf(s, ",rsize=%d", cifs_sb->rsize);
-		seq_printf(s, ",wsize=%d", cifs_sb->wsize);
-	}
+	if (tcon->seal)
+		seq_printf(s, ",seal");
+	if (tcon->nocase)
+		seq_printf(s, ",nocase");
+	if (tcon->retry)
+		seq_printf(s, ",hard");
+	if (cifs_sb->prepath)
+		seq_printf(s, ",prepath=%s", cifs_sb->prepath);
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+		seq_printf(s, ",posixpaths");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
+		seq_printf(s, ",setuids");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+		seq_printf(s, ",serverino");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
+		seq_printf(s, ",directio");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
+		seq_printf(s, ",nouser_xattr");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
+		seq_printf(s, ",mapchars");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+		seq_printf(s, ",sfu");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
+		seq_printf(s, ",nobrl");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
+		seq_printf(s, ",cifsacl");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+		seq_printf(s, ",dynperm");
+	if (m->mnt_sb->s_flags & MS_POSIXACL)
+		seq_printf(s, ",acl");
+
+	seq_printf(s, ",rsize=%d", cifs_sb->rsize);
+	seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+
 	return 0;
 }
 
@@ -535,9 +536,14 @@ static void cifs_umount_begin(struct super_block *sb)
 	if (tcon == NULL)
 		return;
 
-	lock_kernel();
 	read_lock(&cifs_tcp_ses_lock);
-	if (tcon->tc_count == 1)
+	if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) {
+		/* we have other mounts to same share or we have
+		   already tried to force umount this and woken up
+		   all waiting network requests, nothing to do */
+		read_unlock(&cifs_tcp_ses_lock);
+		return;
+	} else if (tcon->tc_count == 1)
 		tcon->tidStatus = CifsExiting;
 	read_unlock(&cifs_tcp_ses_lock);
 
@@ -552,9 +558,7 @@ static void cifs_umount_begin(struct super_block *sb)
 		wake_up_all(&tcon->ses->server->response_q);
 		msleep(1);
 	}
-/* BB FIXME - finish add checks for tidStatus BB */
 
-	unlock_kernel();
 	return;
 }
 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9570a0e8023f..6c170948300d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,6 +24,19 @@
 
 #define ROOT_I 2
 
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static inline ino_t
+cifs_uniqueid_to_ino_t(u64 fileid)
+{
+	ino_t ino = (ino_t) fileid;
+	if (sizeof(ino_t) < sizeof(u64))
+		ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
+	return ino;
+}
+
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
@@ -100,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.59"
+#define CIFS_VERSION   "1.60"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a61ab772c6f6..63f6cdfa5638 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -83,7 +83,7 @@ enum securityEnum {
 	NTLM,			/* Legacy NTLM012 auth with NTLM hash */
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO, NTLMv2 hash */
-	NTLMSSP,		/* NTLMSSP via SPNEGO, NTLMv2 hash */
+/*	NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
 	Kerberos,		/* Kerberos via SPNEGO */
 	MSKerberos,		/* MS Kerberos via SPNEGO */
 };
@@ -364,13 +364,13 @@ struct cifsInodeInfo {
 	struct list_head openFileList;
 	int write_behind_rc;
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
-	atomic_t inUse;	 /* num concurrent users (local openers cifs) of file*/
 	unsigned long time;	/* jiffies of last update/check of inode */
 	bool clientCanCacheRead:1;	/* read oplock */
 	bool clientCanCacheAll:1;	/* read and writebehind oplock */
 	bool oplockPending:1;
 	bool delete_pending:1;		/* DELETE_ON_CLOSE is set */
 	u64  server_eof;		/* current file size on server */
+	u64  uniqueid;			/* server inode number */
 	struct inode vfs_inode;
 };
 
@@ -472,6 +472,32 @@ struct dfs_info3_param {
 	char *node_name;
 };
 
+/*
+ * common struct for holding inode info when searching for or updating an
+ * inode with new info
+ */
+
+#define CIFS_FATTR_DFS_REFERRAL		0x1
+#define CIFS_FATTR_DELETE_PENDING	0x2
+#define CIFS_FATTR_NEED_REVAL		0x4
+
+struct cifs_fattr {
+	u32		cf_flags;
+	u32		cf_cifsattrs;
+	u64		cf_uniqueid;
+	u64		cf_eof;
+	u64		cf_bytes;
+	uid_t		cf_uid;
+	gid_t		cf_gid;
+	umode_t		cf_mode;
+	dev_t		cf_rdev;
+	unsigned int	cf_nlink;
+	unsigned int	cf_dtype;
+	struct timespec	cf_atime;
+	struct timespec	cf_mtime;
+	struct timespec	cf_ctime;
+};
+
 static inline void free_dfs_info_param(struct dfs_info3_param *param)
 {
 	if (param) {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index a785f69dbc9f..2d07f890a842 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2328,19 +2328,7 @@ struct file_attrib_tag {
 typedef struct {
 	__le32 NextEntryOffset;
 	__u32 ResumeKey; /* as with FileIndex - no need to convert */
-	__le64 EndOfFile;
-	__le64 NumOfBytes;
-	__le64 LastStatusChange; /*SNIA specs DCE time for the 3 time fields */
-	__le64 LastAccessTime;
-	__le64 LastModificationTime;
-	__le64 Uid;
-	__le64 Gid;
-	__le32 Type;
-	__le64 DevMajor;
-	__le64 DevMinor;
-	__le64 UniqueId;
-	__le64 Permissions;
-	__le64 Nlinks;
+	FILE_UNIX_BASIC_INFO basic;
 	char FileName[1];
 } __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f9452329bcce..da8fbf565991 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
 extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			enum securityEnum *secType);
-extern int cifs_inet_pton(const int, const char *source, void *dst);
+extern int cifs_convert_address(char *src, void *dst);
 extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
@@ -98,9 +98,13 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
 			   struct super_block *sb, int mode, int oflags,
 			   int *poplock, __u16 *pnetfid, int xid);
-extern void posix_fill_in_inode(struct inode *tmp_inode,
-				FILE_UNIX_BASIC_INFO *pData, int isNewInode);
-extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
+extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
+				     FILE_UNIX_BASIC_INFO *info,
+				     struct cifs_sb_info *cifs_sb);
+extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
+extern struct inode *cifs_iget(struct super_block *sb,
+			       struct cifs_fattr *fattr);
+
 extern int cifs_get_inode_info(struct inode **pinode,
 			const unsigned char *search_path,
 			FILE_ALL_INFO *pfile_info,
@@ -108,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode,
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, int xid);
-extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
-			    const char *path, const __u16 *pfid);
+extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
+			      struct cifs_fattr *fattr, struct inode *inode,
+			      const char *path, const __u16 *pfid);
 extern int mode_to_acl(struct inode *inode, const char *path, __u64);
 
 extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
@@ -215,7 +220,11 @@ struct cifs_unix_set_info_args {
 	dev_t	device;
 };
 
-extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+				  const struct cifs_unix_set_info_args *args,
+				  u16 fid, u32 pid_of_opener);
+
+extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon,
 			char *fileName,
 			const struct cifs_unix_set_info_args *args,
 			const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b84c61d5bca4..922f5fe2084c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	else if (secFlags & CIFSSEC_MAY_KRB5)
 		server->secType = Kerberos;
 	else if (secFlags & CIFSSEC_MAY_NTLMSSP)
-		server->secType = NTLMSSP;
+		server->secType = RawNTLMSSP;
 	else if (secFlags & CIFSSEC_MAY_LANMAN)
 		server->secType = LANMAN;
 /* #ifdef CONFIG_CIFS_EXPERIMENTAL
@@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
 	 * the tcon is no longer on the list, so no need to take lock before
 	 * checking this.
 	 */
-	if (tcon->need_reconnect)
+	if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
 		return 0;
 
 	rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
@@ -5074,10 +5074,114 @@ SetAttrLgcyRetry:
 }
 #endif /* temporarily unneeded SetAttr legacy function */
 
+static void
+cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
+			const struct cifs_unix_set_info_args *args)
+{
+	u64 mode = args->mode;
+
+	/*
+	 * Samba server ignores set of file size to zero due to bugs in some
+	 * older clients, but we should be precise - we use SetFileSize to
+	 * set file size and do not want to truncate file size to zero
+	 * accidently as happened on one Samba server beta by putting
+	 * zero instead of -1 here
+	 */
+	data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
+	data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
+	data_offset->LastStatusChange = cpu_to_le64(args->ctime);
+	data_offset->LastAccessTime = cpu_to_le64(args->atime);
+	data_offset->LastModificationTime = cpu_to_le64(args->mtime);
+	data_offset->Uid = cpu_to_le64(args->uid);
+	data_offset->Gid = cpu_to_le64(args->gid);
+	/* better to leave device as zero when it is  */
+	data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
+	data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
+	data_offset->Permissions = cpu_to_le64(mode);
+
+	if (S_ISREG(mode))
+		data_offset->Type = cpu_to_le32(UNIX_FILE);
+	else if (S_ISDIR(mode))
+		data_offset->Type = cpu_to_le32(UNIX_DIR);
+	else if (S_ISLNK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
+	else if (S_ISCHR(mode))
+		data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
+	else if (S_ISBLK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
+	else if (S_ISFIFO(mode))
+		data_offset->Type = cpu_to_le32(UNIX_FIFO);
+	else if (S_ISSOCK(mode))
+		data_offset->Type = cpu_to_le32(UNIX_SOCKET);
+}
+
 int
-CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
-		   const struct cifs_unix_set_info_args *args,
-		   const struct nls_table *nls_codepage, int remap)
+CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+		       const struct cifs_unix_set_info_args *args,
+		       u16 fid, u32 pid_of_opener)
+{
+	struct smb_com_transaction2_sfi_req *pSMB  = NULL;
+	FILE_UNIX_BASIC_INFO *data_offset;
+	int rc = 0;
+	u16 params, param_offset, offset, byte_count, count;
+
+	cFYI(1, ("Set Unix Info (via SetFileInfo)"));
+	rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
+
+	if (rc)
+		return rc;
+
+	pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
+
+	params = 6;
+	pSMB->MaxSetupCount = 0;
+	pSMB->Reserved = 0;
+	pSMB->Flags = 0;
+	pSMB->Timeout = 0;
+	pSMB->Reserved2 = 0;
+	param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
+	offset = param_offset + params;
+
+	data_offset = (FILE_UNIX_BASIC_INFO *)
+				((char *)(&pSMB->hdr.Protocol) + offset);
+	count = sizeof(FILE_UNIX_BASIC_INFO);
+
+	pSMB->MaxParameterCount = cpu_to_le16(2);
+	/* BB find max SMB PDU from sess */
+	pSMB->MaxDataCount = cpu_to_le16(1000);
+	pSMB->SetupCount = 1;
+	pSMB->Reserved3 = 0;
+	pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
+	byte_count = 3 /* pad */  + params + count;
+	pSMB->DataCount = cpu_to_le16(count);
+	pSMB->ParameterCount = cpu_to_le16(params);
+	pSMB->TotalDataCount = pSMB->DataCount;
+	pSMB->TotalParameterCount = pSMB->ParameterCount;
+	pSMB->ParameterOffset = cpu_to_le16(param_offset);
+	pSMB->DataOffset = cpu_to_le16(offset);
+	pSMB->Fid = fid;
+	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
+	pSMB->Reserved4 = 0;
+	pSMB->hdr.smb_buf_length += byte_count;
+	pSMB->ByteCount = cpu_to_le16(byte_count);
+
+	cifs_fill_unix_set_info(data_offset, args);
+
+	rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
+	if (rc)
+		cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
+
+	/* Note: On -EAGAIN error only caller can retry on handle based calls
+		since file handle passed in no longer valid */
+
+	return rc;
+}
+
+int
+CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
+		       const struct cifs_unix_set_info_args *args,
+		       const struct nls_table *nls_codepage, int remap)
 {
 	TRANSACTION2_SPI_REQ *pSMB = NULL;
 	TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5086,7 +5190,6 @@ CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
 	int bytes_returned = 0;
 	FILE_UNIX_BASIC_INFO *data_offset;
 	__u16 params, param_offset, offset, count, byte_count;
-	__u64 mode = args->mode;
 
 	cFYI(1, ("In SetUID/GID/Mode"));
 setPermsRetry:
@@ -5137,38 +5240,8 @@ setPermsRetry:
 	pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
 	pSMB->Reserved4 = 0;
 	pSMB->hdr.smb_buf_length += byte_count;
-	/* Samba server ignores set of file size to zero due to bugs in some
-	older clients, but we should be precise - we use SetFileSize to
-	set file size and do not want to truncate file size to zero
-	accidently as happened on one Samba server beta by putting
-	zero instead of -1 here */
-	data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
-	data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
-	data_offset->LastStatusChange = cpu_to_le64(args->ctime);
-	data_offset->LastAccessTime = cpu_to_le64(args->atime);
-	data_offset->LastModificationTime = cpu_to_le64(args->mtime);
-	data_offset->Uid = cpu_to_le64(args->uid);
-	data_offset->Gid = cpu_to_le64(args->gid);
-	/* better to leave device as zero when it is  */
-	data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
-	data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
-	data_offset->Permissions = cpu_to_le64(mode);
-
-	if (S_ISREG(mode))
-		data_offset->Type = cpu_to_le32(UNIX_FILE);
-	else if (S_ISDIR(mode))
-		data_offset->Type = cpu_to_le32(UNIX_DIR);
-	else if (S_ISLNK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
-	else if (S_ISCHR(mode))
-		data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
-	else if (S_ISBLK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
-	else if (S_ISFIFO(mode))
-		data_offset->Type = cpu_to_le32(UNIX_FIFO);
-	else if (S_ISSOCK(mode))
-		data_offset->Type = cpu_to_le32(UNIX_SOCKET);
 
+	cifs_fill_unix_set_info(data_offset, args);
 
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 97f4311b9a8e..e16d7592116a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -70,7 +70,6 @@ struct smb_vol {
 	mode_t file_mode;
 	mode_t dir_mode;
 	unsigned secFlg;
-	bool rw:1;
 	bool retry:1;
 	bool intr:1;
 	bool setuids:1;
@@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 	vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
 
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
-	vol->rw = true;
 	/* default is always to request posix paths. */
 	vol->posix_paths = 1;
 	/* default to using server inode numbers where available */
@@ -1199,7 +1197,9 @@ cifs_parse_mount_options(char *options, const char *devname,
 		} else if (strnicmp(data, "guest", 5) == 0) {
 			/* ignore */
 		} else if (strnicmp(data, "rw", 2) == 0) {
-			vol->rw = true;
+			/* ignore */
+		} else if (strnicmp(data, "ro", 2) == 0) {
+			/* ignore */
 		} else if (strnicmp(data, "noblocksend", 11) == 0) {
 			vol->noblocksnd = 1;
 		} else if (strnicmp(data, "noautotune", 10) == 0) {
@@ -1218,8 +1218,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 			    parse these options again and set anything and it
 			    is ok to just ignore them */
 			continue;
-		} else if (strnicmp(data, "ro", 2) == 0) {
-			vol->rw = false;
 		} else if (strnicmp(data, "hard", 4) == 0) {
 			vol->retry = 1;
 		} else if (strnicmp(data, "soft", 4) == 0) {
@@ -1386,8 +1384,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
 		     server->addr.sockAddr.sin_addr.s_addr))
 			continue;
 		else if (addr->ss_family == AF_INET6 &&
-			 !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
-					  &addr6->sin6_addr))
+			 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
+					   &addr6->sin6_addr) ||
+			  server->addr.sockAddr6.sin6_scope_id !=
+					   addr6->sin6_scope_id))
 			continue;
 
 		++server->srv_count;
@@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 
 	memset(&addr, 0, sizeof(struct sockaddr_storage));
 
-	if (volume_info->UNCip && volume_info->UNC) {
-		rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
-				    &sin_server->sin_addr.s_addr);
-
-		if (rc <= 0) {
-			/* not ipv4 address, try ipv6 */
-			rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
-					    &sin_server6->sin6_addr.in6_u);
-			if (rc > 0)
-				addr.ss_family = AF_INET6;
-		} else {
-			addr.ss_family = AF_INET;
-		}
+	cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip));
 
-		if (rc <= 0) {
+	if (volume_info->UNCip && volume_info->UNC) {
+		rc = cifs_convert_address(volume_info->UNCip, &addr);
+		if (!rc) {
 			/* we failed translating address */
 			rc = -EINVAL;
 			goto out_err;
 		}
-
-		cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
-			 volume_info->UNCip));
 	} else if (volume_info->UNCip) {
 		/* BB using ip addr as tcp_ses name to connect to the
 		   DFS root below */
@@ -1513,14 +1500,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		cFYI(1, ("attempting ipv6 connect"));
 		/* BB should we allow ipv6 on port 139? */
 		/* other OS never observed in Wild doing 139 with v6 */
+		sin_server6->sin6_port = htons(volume_info->port);
 		memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
 			sizeof(struct sockaddr_in6));
-		sin_server6->sin6_port = htons(volume_info->port);
 		rc = ipv6_connect(tcp_ses);
 	} else {
+		sin_server->sin_port = htons(volume_info->port);
 		memcpy(&tcp_ses->addr.sockAddr, sin_server,
 			sizeof(struct sockaddr_in));
-		sin_server->sin_port = htons(volume_info->port);
 		rc = ipv4_connect(tcp_ses);
 	}
 	if (rc < 0) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3758965d73d5..4326ffd90fa9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -188,6 +188,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 	FILE_UNIX_BASIC_INFO *presp_data;
 	__u32 posix_flags = 0;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+	struct cifs_fattr fattr;
 
 	cFYI(1, ("posix open %s", full_path));
 
@@ -236,22 +237,21 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 	if (presp_data->Type == cpu_to_le32(-1))
 		goto posix_open_ret; /* open ok, caller does qpathinfo */
 
-	/* get new inode and set it up */
 	if (!pinode)
 		goto posix_open_ret; /* caller does not need info */
 
+	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
+
+	/* get new inode and set it up */
 	if (*pinode == NULL) {
-		__u64 unique_id = le64_to_cpu(presp_data->UniqueId);
-		*pinode = cifs_new_inode(sb, &unique_id);
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode) {
+			rc = -ENOMEM;
+			goto posix_open_ret;
+		}
+	} else {
+		cifs_fattr_to_inode(*pinode, &fattr);
 	}
-	/* else an inode was passed in. Update its info, don't create one */
-
-	/* We do not need to close the file if new_inode fails since
-	   the caller will retry qpathinfo as long as inode is null */
-	if (*pinode == NULL)
-		goto posix_open_ret;
-
-	posix_fill_in_inode(*pinode, presp_data, 1);
 
 	cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
 
@@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if (oplockEnabled)
@@ -424,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
-			cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+		CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 	} else {
 		/* BB implement mode setting via Windows security
 		   descriptors e.g. */
@@ -514,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
-			&args, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+		rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
+					    cifs_sb->local_nls,
+					    cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (!rc) {
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
@@ -540,8 +542,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 			buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 			if (buf == NULL) {
 				kfree(full_path);
+				rc = -ENOMEM;
 				FreeXid(xid);
-				return -ENOMEM;
+				return rc;
 			}
 
 			rc = CIFSSMBOpen(xid, pTcon, full_path,
@@ -641,6 +644,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 			}
 	}
 
+	/*
+	 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
+	 * the VFS handle the create.
+	 */
+	if (nd->flags & LOOKUP_EXCL) {
+		d_instantiate(direntry, NULL);
+		return 0;
+	}
+
 	/* can not grab the rename sem here since it would
 	deadlock in the cases (beginning of sys_rename itself)
 	in which we already have the sb rename sem */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index df4a306f697e..87948147d7ec 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -35,26 +35,11 @@
  * 		0 - name is not IP
  */
 static int
-is_ip(const char *name)
+is_ip(char *name)
 {
-	int rc;
-	struct sockaddr_in sin_server;
-	struct sockaddr_in6 sin_server6;
-
-	rc = cifs_inet_pton(AF_INET, name,
-			&sin_server.sin_addr.s_addr);
-
-	if (rc <= 0) {
-		/* not ipv4 address, try ipv6 */
-		rc = cifs_inet_pton(AF_INET6, name,
-				&sin_server6.sin6_addr.in6_u);
-		if (rc > 0)
-			return 1;
-	} else {
-		return 1;
-	}
-	/* we failed translating address */
-	return 0;
+	struct sockaddr_storage ss;
+
+	return cifs_convert_address(name, &ss);
 }
 
 static int
@@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
 	ip[datalen] = '\0';
 
 	/* make sure this looks like an address */
-	if (!is_ip((const char *) ip)) {
+	if (!is_ip(ip)) {
 		kfree(ip);
 		return -EINVAL;
 	}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06866841b97f..c34b7f8a217b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file)
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
 	pCifsFile = cifs_fill_filedata(file);
 	if (pCifsFile) {
+		rc = 0;
 		FreeXid(xid);
-		return 0;
+		return rc;
 	}
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
@@ -446,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file)
 				.mtime	= NO_CHANGE_64,
 				.device	= 0,
 			};
-			CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
-					    cifs_sb->local_nls,
-					    cifs_sb->mnt_cifs_flags &
+			CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+					       cifs_sb->local_nls,
+					       cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		}
 	}
@@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
 		return -EBADF;
 
 	xid = GetXid();
-	mutex_unlock(&pCifsFile->fh_mutex);
+	mutex_lock(&pCifsFile->fh_mutex);
 	if (!pCifsFile->invalidHandle) {
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
+		rc = 0;
 		FreeXid(xid);
-		return 0;
+		return rc;
 	}
 
 	if (file->f_path.dentry == NULL) {
@@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
 	if (full_path == NULL) {
 		rc = -ENOMEM;
 reopen_error_exit:
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		FreeXid(xid);
 		return rc;
 	}
@@ -566,14 +569,14 @@ reopen_error_exit:
 			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc) {
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		cFYI(1, ("cifs_open returned 0x%x", rc));
 		cFYI(1, ("oplock: %d", oplock));
 	} else {
 reopen_success:
 		pCifsFile->netfid = netfid;
 		pCifsFile->invalidHandle = false;
-		mutex_lock(&pCifsFile->fh_mutex);
+		mutex_unlock(&pCifsFile->fh_mutex);
 		pCifsInode = CIFS_I(inode);
 		if (pCifsInode) {
 			if (can_flush) {
@@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 	tcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
 
@@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 
@@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	pTcon = cifs_sb->tcon;
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 
@@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 
 	xid = GetXid();
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 	open_file = (struct cifsFileInfo *)file->private_data;
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page)
 	xid = GetXid();
 
 	if (file->private_data == NULL) {
+		rc = -EBADF;
 		FreeXid(xid);
-		return -EBADF;
+		return rc;
 	}
 
 	cFYI(1, ("readpage %p at offset %d 0x%x\n",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fad882b075ba..18afe57b2461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -77,239 +77,202 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
 	}
 }
 
-static void cifs_unix_info_to_inode(struct inode *inode,
-		FILE_UNIX_BASIC_INFO *info, int force_uid_gid)
+/* populate an inode with info from a cifs_fattr struct */
+void
+cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 {
+	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
-	__u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
-	__u64 end_of_file = le64_to_cpu(info->EndOfFile);
+	unsigned long oldtime = cifs_i->time;
+
+	inode->i_atime = fattr->cf_atime;
+	inode->i_mtime = fattr->cf_mtime;
+	inode->i_ctime = fattr->cf_ctime;
+	inode->i_rdev = fattr->cf_rdev;
+	inode->i_nlink = fattr->cf_nlink;
+	inode->i_uid = fattr->cf_uid;
+	inode->i_gid = fattr->cf_gid;
+
+	/* if dynperm is set, don't clobber existing mode */
+	if (inode->i_state & I_NEW ||
+	    !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM))
+		inode->i_mode = fattr->cf_mode;
+
+	cifs_i->cifsAttrs = fattr->cf_cifsattrs;
+	cifs_i->uniqueid = fattr->cf_uniqueid;
+
+	if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
+		cifs_i->time = 0;
+	else
+		cifs_i->time = jiffies;
+
+	cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode,
+		 oldtime, cifs_i->time));
 
-	inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime);
-	inode->i_mtime =
-		cifs_NTtimeToUnix(info->LastModificationTime);
-	inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
-	inode->i_mode = le64_to_cpu(info->Permissions);
+	cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
+
+	/*
+	 * Can't safely change the file size here if the client is writing to
+	 * it due to potential races.
+	 */
+	spin_lock(&inode->i_lock);
+	if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
+		i_size_write(inode, fattr->cf_eof);
+
+		/*
+		 * i_blocks is not related to (i_size / i_blksize),
+		 * but instead 512 byte (2**9) size is required for
+		 * calculating num blocks.
+		 */
+		inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9;
+	}
+	spin_unlock(&inode->i_lock);
+
+	cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
+}
+
+/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
+void
+cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
+			 struct cifs_sb_info *cifs_sb)
+{
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_uniqueid = le64_to_cpu(info->UniqueId);
+	fattr->cf_bytes = le64_to_cpu(info->NumOfBytes);
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
+
+	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+	fattr->cf_mode = le64_to_cpu(info->Permissions);
 
 	/*
 	 * Since we set the inode type below we need to mask off
 	 * to avoid strange results if bits set above.
 	 */
-	inode->i_mode &= ~S_IFMT;
+	fattr->cf_mode &= ~S_IFMT;
 	switch (le32_to_cpu(info->Type)) {
 	case UNIX_FILE:
-		inode->i_mode |= S_IFREG;
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		break;
 	case UNIX_SYMLINK:
-		inode->i_mode |= S_IFLNK;
+		fattr->cf_mode |= S_IFLNK;
+		fattr->cf_dtype = DT_LNK;
 		break;
 	case UNIX_DIR:
-		inode->i_mode |= S_IFDIR;
+		fattr->cf_mode |= S_IFDIR;
+		fattr->cf_dtype = DT_DIR;
 		break;
 	case UNIX_CHARDEV:
-		inode->i_mode |= S_IFCHR;
-		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
-				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		fattr->cf_mode |= S_IFCHR;
+		fattr->cf_dtype = DT_CHR;
+		fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				       le64_to_cpu(info->DevMinor) & MINORMASK);
 		break;
 	case UNIX_BLOCKDEV:
-		inode->i_mode |= S_IFBLK;
-		inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor),
-				      le64_to_cpu(info->DevMinor) & MINORMASK);
+		fattr->cf_mode |= S_IFBLK;
+		fattr->cf_dtype = DT_BLK;
+		fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
+				       le64_to_cpu(info->DevMinor) & MINORMASK);
 		break;
 	case UNIX_FIFO:
-		inode->i_mode |= S_IFIFO;
+		fattr->cf_mode |= S_IFIFO;
+		fattr->cf_dtype = DT_FIFO;
 		break;
 	case UNIX_SOCKET:
-		inode->i_mode |= S_IFSOCK;
+		fattr->cf_mode |= S_IFSOCK;
+		fattr->cf_dtype = DT_SOCK;
 		break;
 	default:
 		/* safest to call it a file if we do not know */
-		inode->i_mode |= S_IFREG;
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
 		break;
 	}
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) &&
-	    !force_uid_gid)
-		inode->i_uid = cifs_sb->mnt_uid;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
+		fattr->cf_uid = cifs_sb->mnt_uid;
 	else
-		inode->i_uid = le64_to_cpu(info->Uid);
+		fattr->cf_uid = le64_to_cpu(info->Uid);
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) &&
-	    !force_uid_gid)
-		inode->i_gid = cifs_sb->mnt_gid;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
+		fattr->cf_gid = cifs_sb->mnt_gid;
 	else
-		inode->i_gid = le64_to_cpu(info->Gid);
-
-	inode->i_nlink = le64_to_cpu(info->Nlinks);
-
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/*
-		 * We can not safely change the file size here if the client
-		 * is writing to it due to potential races.
-		 */
-		i_size_write(inode, end_of_file);
+		fattr->cf_gid = le64_to_cpu(info->Gid);
 
-		/*
-		 * i_blocks is not related to (i_size / i_blksize),
-		 * but instead 512 byte (2**9) size is required for
-		 * calculating num blocks.
-		 */
-		inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-	}
-	spin_unlock(&inode->i_lock);
+	fattr->cf_nlink = le64_to_cpu(info->Nlinks);
 }
 
-
 /*
- *	Needed to setup inode data for the directory which is the
- *	junction to the new submount (ie to setup the fake directory
- *      which represents a DFS referral)
- */
-static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat,
-			       struct super_block *sb)
-{
-	struct inode *pinode = NULL;
-
-	memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO));
-
-/*	__le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
-	__le64 pfnd_dat->NumOfBytes = cpu_to_le64(0);
-	__u64 UniqueId = 0;  */
-	pfnd_dat->LastStatusChange =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastAccessTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastModificationTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->Type = cpu_to_le32(UNIX_DIR);
-	pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU);
-	pfnd_dat->Nlinks = cpu_to_le64(2);
-	if (sb->s_root)
-		pinode = sb->s_root->d_inode;
-	if (pinode == NULL)
-		return;
-
-	/* fill in default values for the remaining based on root
-	   inode since we can not query the server for this inode info */
-	pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev));
-	pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev));
-	pfnd_dat->Uid = cpu_to_le64(pinode->i_uid);
-	pfnd_dat->Gid = cpu_to_le64(pinode->i_gid);
-}
-
-/**
- * cifs_new inode - create new inode, initialize, and hash it
- * @sb - pointer to superblock
- * @inum - if valid pointer and serverino is enabled, replace i_ino with val
- *
- * Create a new inode, initialize it for CIFS and hash it. Returns the new
- * inode or NULL if one couldn't be allocated.
+ * Fill a cifs_fattr struct with fake inode info.
  *
- * If the share isn't mounted with "serverino" or inum is a NULL pointer then
- * we'll just use the inode number assigned by new_inode(). Note that this can
- * mean i_ino collisions since the i_ino assigned by new_inode is not
- * guaranteed to be unique.
+ * Needed to setup cifs_fattr data for the directory which is the
+ * junction to the new submount (ie to setup the fake directory
+ * which represents a DFS referral).
  */
-struct inode *
-cifs_new_inode(struct super_block *sb, __u64 *inum)
+void
+cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 {
-	struct inode *inode;
-
-	inode = new_inode(sb);
-	if (inode == NULL)
-		return NULL;
-
-	/*
-	 * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we
-	 *     stop passing inum as ptr. Are there sanity checks we can use to
-	 *     ensure that the server is really filling in that field? Also,
-	 *     if serverino is disabled, perhaps we should be using iunique()?
-	 */
-	if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
-		inode->i_ino = (unsigned long) *inum;
-
-	/*
-	 * must set this here instead of cifs_alloc_inode since VFS will
-	 * clobber i_flags
-	 */
-	if (sb->s_flags & MS_NOATIME)
-		inode->i_flags |= S_NOATIME | S_NOCMTIME;
-
-	insert_inode_hash(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
-	return inode;
+	cFYI(1, ("creating fake fattr for DFS referral"));
+
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_atime = CURRENT_TIME;
+	fattr->cf_ctime = CURRENT_TIME;
+	fattr->cf_mtime = CURRENT_TIME;
+	fattr->cf_nlink = 2;
+	fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
 }
 
 int cifs_get_inode_info_unix(struct inode **pinode,
-	const unsigned char *full_path, struct super_block *sb, int xid)
+			     const unsigned char *full_path,
+			     struct super_block *sb, int xid)
 {
-	int rc = 0;
+	int rc;
 	FILE_UNIX_BASIC_INFO find_data;
-	struct cifsTconInfo *pTcon;
-	struct inode *inode;
+	struct cifs_fattr fattr;
+	struct cifsTconInfo *tcon;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-	bool is_dfs_referral = false;
-	struct cifsInodeInfo *cifsInfo;
-	__u64 num_of_bytes;
-	__u64 end_of_file;
 
-	pTcon = cifs_sb->tcon;
+	tcon = cifs_sb->tcon;
 	cFYI(1, ("Getting info on %s", full_path));
 
 	/* could have done a find first instead but this returns more info */
-	rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data,
+	rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
 				  cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc == -EREMOTE && !is_dfs_referral) {
-		is_dfs_referral = true;
-		cFYI(DBG2, ("DFS ref"));
-		/* for DFS, server does not give us real inode data */
-		fill_fake_finddataunix(&find_data, sb);
-		rc = 0;
-	} else if (rc)
-		goto cgiiu_exit;
 
-	num_of_bytes = le64_to_cpu(find_data.NumOfBytes);
-	end_of_file = le64_to_cpu(find_data.EndOfFile);
+	if (!rc) {
+		cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
+	} else if (rc == -EREMOTE) {
+		cifs_create_dfs_fattr(&fattr, sb);
+		rc = 0;
+	} else {
+		return rc;
+	}
 
-	/* get new inode */
 	if (*pinode == NULL) {
-		__u64 unique_id = le64_to_cpu(find_data.UniqueId);
-		*pinode = cifs_new_inode(sb, &unique_id);
-		if (*pinode == NULL) {
+		/* get new inode */
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode)
 			rc = -ENOMEM;
-			goto cgiiu_exit;
-		}
+	} else {
+		/* we already have inode, update it */
+		cifs_fattr_to_inode(*pinode, &fattr);
 	}
 
-	inode = *pinode;
-	cifsInfo = CIFS_I(inode);
-
-	cFYI(1, ("Old time %ld", cifsInfo->time));
-	cifsInfo->time = jiffies;
-	cFYI(1, ("New time %ld", cifsInfo->time));
-	/* this is ok to set on every inode revalidate */
-	atomic_set(&cifsInfo->inUse, 1);
-
-	cifs_unix_info_to_inode(inode, &find_data, 0);
-
-	if (num_of_bytes < end_of_file)
-		cFYI(1, ("allocation size less than end of file"));
-	cFYI(1, ("Size %ld and blocks %llu",
-		(unsigned long) inode->i_size,
-		(unsigned long long)inode->i_blocks));
-
-	cifs_set_ops(inode, is_dfs_referral);
-cgiiu_exit:
 	return rc;
 }
 
-static int decode_sfu_inode(struct inode *inode, __u64 size,
-			    const unsigned char *path,
-			    struct cifs_sb_info *cifs_sb, int xid)
+static int
+cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
+	      struct cifs_sb_info *cifs_sb, int xid)
 {
 	int rc;
 	int oplock = 0;
@@ -321,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 
 	pbuf = buf;
 
-	if (size == 0) {
-		inode->i_mode |= S_IFIFO;
+	fattr->cf_mode &= ~S_IFMT;
+
+	if (fattr->cf_eof == 0) {
+		fattr->cf_mode |= S_IFIFO;
+		fattr->cf_dtype = DT_FIFO;
 		return 0;
-	} else if (size < 8) {
+	} else if (fattr->cf_eof < 8) {
+		fattr->cf_mode |= S_IFREG;
+		fattr->cf_dtype = DT_REG;
 		return -EINVAL;	 /* EOPNOTSUPP? */
 	}
 
@@ -336,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 	if (rc == 0) {
 		int buf_type = CIFS_NO_BUFFER;
 			/* Read header */
-		rc = CIFSSMBRead(xid, pTcon,
-				 netfid,
+		rc = CIFSSMBRead(xid, pTcon, netfid,
 				 24 /* length */, 0 /* offset */,
 				 &bytes_read, &pbuf, &buf_type);
 		if ((rc == 0) && (bytes_read >= 8)) {
 			if (memcmp("IntxBLK", pbuf, 8) == 0) {
 				cFYI(1, ("Block device"));
-				inode->i_mode |= S_IFBLK;
+				fattr->cf_mode |= S_IFBLK;
+				fattr->cf_dtype = DT_BLK;
 				if (bytes_read == 24) {
 					/* we have enough to decode dev num */
 					__u64 mjr; /* major */
 					__u64 mnr; /* minor */
 					mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
 					mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
-					inode->i_rdev = MKDEV(mjr, mnr);
+					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxCHR", pbuf, 8) == 0) {
 				cFYI(1, ("Char device"));
-				inode->i_mode |= S_IFCHR;
+				fattr->cf_mode |= S_IFCHR;
+				fattr->cf_dtype = DT_CHR;
 				if (bytes_read == 24) {
 					/* we have enough to decode dev num */
 					__u64 mjr; /* major */
 					__u64 mnr; /* minor */
 					mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
 					mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
-					inode->i_rdev = MKDEV(mjr, mnr);
+					fattr->cf_rdev = MKDEV(mjr, mnr);
 				}
 			} else if (memcmp("IntxLNK", pbuf, 7) == 0) {
 				cFYI(1, ("Symlink"));
-				inode->i_mode |= S_IFLNK;
+				fattr->cf_mode |= S_IFLNK;
+				fattr->cf_dtype = DT_LNK;
 			} else {
-				inode->i_mode |= S_IFREG; /* file? */
+				fattr->cf_mode |= S_IFREG; /* file? */
+				fattr->cf_dtype = DT_REG;
 				rc = -EOPNOTSUPP;
 			}
 		} else {
-			inode->i_mode |= S_IFREG; /* then it is a file */
+			fattr->cf_mode |= S_IFREG; /* then it is a file */
+			fattr->cf_dtype = DT_REG;
 			rc = -EOPNOTSUPP; /* or some unknown SFU type */
 		}
 		CIFSSMBClose(xid, pTcon, netfid);
@@ -381,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
 
 #define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID)  /* SETFILEBITS valid bits */
 
-static int get_sfu_mode(struct inode *inode,
-			const unsigned char *path,
-			struct cifs_sb_info *cifs_sb, int xid)
+/*
+ * Fetch mode bits as provided by SFU.
+ *
+ * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ?
+ */
+static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
+			 struct cifs_sb_info *cifs_sb, int xid)
 {
 #ifdef CONFIG_CIFS_XATTR
 	ssize_t rc;
@@ -391,68 +367,80 @@ static int get_sfu_mode(struct inode *inode,
 	__u32 mode;
 
 	rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS",
-			ea_value, 4 /* size of buf */, cifs_sb->local_nls,
-		cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+			    ea_value, 4 /* size of buf */, cifs_sb->local_nls,
+			    cifs_sb->mnt_cifs_flags &
+				CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc < 0)
 		return (int)rc;
 	else if (rc > 3) {
 		mode = le32_to_cpu(*((__le32 *)ea_value));
-		inode->i_mode &= ~SFBITS_MASK;
-		cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode));
-		inode->i_mode = (mode &  SFBITS_MASK) | inode->i_mode;
+		fattr->cf_mode &= ~SFBITS_MASK;
+		cFYI(1, ("special bits 0%o org mode 0%o", mode,
+			 fattr->cf_mode));
+		fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
 		cFYI(1, ("special mode bits 0%o", mode));
-		return 0;
-	} else {
-		return 0;
 	}
+
+	return 0;
 #else
 	return -EOPNOTSUPP;
 #endif
 }
 
-/*
- *	Needed to setup inode data for the directory which is the
- *	junction to the new submount (ie to setup the fake directory
- *      which represents a DFS referral)
- */
-static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat,
-			       struct super_block *sb)
+/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
+void
+cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
+		       struct cifs_sb_info *cifs_sb, bool adjust_tz)
 {
-	memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO));
-
-/*	__le64 pfnd_dat->AllocationSize = cpu_to_le64(0);
-	__le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
-	__u8 pfnd_dat->DeletePending = 0;
-	__u8 pfnd_data->Directory = 0;
-	__le32 pfnd_dat->EASize = 0;
-	__u64 pfnd_dat->IndexNumber = 0;
-	__u64 pfnd_dat->IndexNumber1 = 0;  */
-	pfnd_dat->CreationTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastAccessTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->LastWriteTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->ChangeTime =
-		cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY);
-	pfnd_dat->NumberOfLinks = cpu_to_le32(2);
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
+	if (info->DeletePending)
+		fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING;
+
+	if (info->LastAccessTime)
+		fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	else
+		fattr->cf_atime = CURRENT_TIME;
+
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+
+	if (adjust_tz) {
+		fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
+		fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
+	}
+
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
+	fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+
+	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_dtype = DT_DIR;
+	} else {
+		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_dtype = DT_REG;
+
+		/* clear write bits if ATTR_READONLY is set */
+		if (fattr->cf_cifsattrs & ATTR_READONLY)
+			fattr->cf_mode &= ~(S_IWUGO);
+	}
+
+	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
 }
 
 int cifs_get_inode_info(struct inode **pinode,
 	const unsigned char *full_path, FILE_ALL_INFO *pfindData,
 	struct super_block *sb, int xid, const __u16 *pfid)
 {
-	int rc = 0;
-	__u32 attr;
-	struct cifsInodeInfo *cifsInfo;
+	int rc = 0, tmprc;
 	struct cifsTconInfo *pTcon;
-	struct inode *inode;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	char *buf = NULL;
 	bool adjustTZ = false;
-	bool is_dfs_referral = false;
-	umode_t default_mode;
+	struct cifs_fattr fattr;
 
 	pTcon = cifs_sb->tcon;
 	cFYI(1, ("Getting info on %s", full_path));
@@ -487,163 +475,82 @@ int cifs_get_inode_info(struct inode **pinode,
 			adjustTZ = true;
 		}
 	}
-	/* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
-	if (rc == -EREMOTE) {
-		is_dfs_referral = true;
-		fill_fake_finddata(pfindData, sb);
+
+	if (!rc) {
+		cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData,
+				       cifs_sb, adjustTZ);
+	} else if (rc == -EREMOTE) {
+		cifs_create_dfs_fattr(&fattr, sb);
 		rc = 0;
-	} else if (rc)
+	} else {
 		goto cgii_exit;
+	}
 
-	attr = le32_to_cpu(pfindData->Attributes);
-
-	/* get new inode */
+	/*
+	 * If an inode wasn't passed in, then get the inode number
+	 *
+	 * Is an i_ino of zero legal? Can we use that to check if the server
+	 * supports returning inode numbers?  Are there other sanity checks we
+	 * can use to ensure that the server is really filling in that field?
+	 *
+	 * We can not use the IndexNumber field by default from Windows or
+	 * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA
+	 * CIFS spec claims that this value is unique within the scope of a
+	 * share, and the windows docs hint that it's actually unique
+	 * per-machine.
+	 *
+	 * There may be higher info levels that work but are there Windows
+	 * server or network appliances for which IndexNumber field is not
+	 * guaranteed unique?
+	 */
 	if (*pinode == NULL) {
-		__u64 inode_num;
-		__u64 *pinum = &inode_num;
-
-		/* Is an i_ino of zero legal? Can we use that to check
-		   if the server supports returning inode numbers?  Are
-		   there other sanity checks we can use to ensure that
-		   the server is really filling in that field? */
-
-		/* We can not use the IndexNumber field by default from
-		   Windows or Samba (in ALL_INFO buf) but we can request
-		   it explicitly.  It may not be unique presumably if
-		   the server has multiple devices mounted under one share */
-
-		/* There may be higher info levels that work but are
-		   there Windows server or network appliances for which
-		   IndexNumber field is not guaranteed unique? */
-
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 			int rc1 = 0;
 
 			rc1 = CIFSGetSrvInodeNumber(xid, pTcon,
-					full_path, pinum,
+					full_path, &fattr.cf_uniqueid,
 					cifs_sb->local_nls,
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 			if (rc1) {
-				cFYI(1, ("GetSrvInodeNum rc %d", rc1));
-				pinum = NULL;
 				/* BB EOPNOSUPP disable SERVER_INUM? */
+				cFYI(1, ("GetSrvInodeNum rc %d", rc1));
+				fattr.cf_uniqueid = iunique(sb, ROOT_I);
 			}
 		} else {
-			pinum = NULL;
+			fattr.cf_uniqueid = iunique(sb, ROOT_I);
 		}
-
-		*pinode = cifs_new_inode(sb, pinum);
-		if (*pinode == NULL) {
-			rc = -ENOMEM;
-			goto cgii_exit;
-		}
-	}
-	inode = *pinode;
-	cifsInfo = CIFS_I(inode);
-	cifsInfo->cifsAttrs = attr;
-	cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
-	cFYI(1, ("Old time %ld", cifsInfo->time));
-	cifsInfo->time = jiffies;
-	cFYI(1, ("New time %ld", cifsInfo->time));
-
-	/* blksize needs to be multiple of two. So safer to default to
-	blksize and blkbits set in superblock so 2**blkbits and blksize
-	will match rather than setting to:
-	(pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
-
-	/* Linux can not store file creation time so ignore it */
-	if (pfindData->LastAccessTime)
-		inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
-	else /* do not need to use current_fs_time - time not stored */
-		inode->i_atime = CURRENT_TIME;
-	inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
-	inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
-	cFYI(DBG2, ("Attributes came in as 0x%x", attr));
-	if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
-		inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
-		inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
-	}
-
-	/* get default inode mode */
-	if (attr & ATTR_DIRECTORY)
-		default_mode = cifs_sb->mnt_dir_mode;
-	else
-		default_mode = cifs_sb->mnt_file_mode;
-
-	/* set permission bits */
-	if (atomic_read(&cifsInfo->inUse) == 0 ||
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-		inode->i_mode = default_mode;
-	else {
-		/* just reenable write bits if !ATTR_READONLY */
-		if ((inode->i_mode & S_IWUGO) == 0 &&
-		    (attr & ATTR_READONLY) == 0)
-			inode->i_mode |= (S_IWUGO & default_mode);
-
-		inode->i_mode &= ~S_IFMT;
-	}
-	/* clear write bits if ATTR_READONLY is set */
-	if (attr & ATTR_READONLY)
-		inode->i_mode &= ~S_IWUGO;
-
-	/* set inode type */
-	if ((attr & ATTR_SYSTEM) &&
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-		/* no need to fix endianness on 0 */
-		if (pfindData->EndOfFile == 0)
-			inode->i_mode |= S_IFIFO;
-		else if (decode_sfu_inode(inode,
-				le64_to_cpu(pfindData->EndOfFile),
-				full_path, cifs_sb, xid))
-			cFYI(1, ("unknown SFU file type\n"));
 	} else {
-		if (attr & ATTR_DIRECTORY)
-			inode->i_mode |= S_IFDIR;
-		else
-			inode->i_mode |= S_IFREG;
+		fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid;
 	}
 
-	cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile);
-	spin_lock(&inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) {
-		/* can not safely shrink the file size here if the
-		   client is writing to it due to potential races */
-		i_size_write(inode, cifsInfo->server_eof);
-
-		/* 512 bytes (2**9) is the fake blocksize that must be
-		   used for this calculation */
-		inode->i_blocks = (512 - 1 + le64_to_cpu(
-				   pfindData->AllocationSize)) >> 9;
+	/* query for SFU type info if supported and needed */
+	if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
+	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
+		tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
+		if (tmprc)
+			cFYI(1, ("cifs_sfu_type failed: %d", tmprc));
 	}
-	spin_unlock(&inode->i_lock);
-
-	inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks);
 
-	/* BB fill in uid and gid here? with help from winbind?
-	   or retrieve from NTFS stream extended attribute */
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		cFYI(1, ("Getting mode bits from ACL"));
-		acl_to_uid_mode(cifs_sb, inode, full_path, pfid);
+		cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
 	}
 #endif
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
-		/* fill in remaining high mode bits e.g. SUID, VTX */
-		get_sfu_mode(inode, full_path, cifs_sb, xid);
-	} else if (atomic_read(&cifsInfo->inUse) == 0) {
-		inode->i_uid = cifs_sb->mnt_uid;
-		inode->i_gid = cifs_sb->mnt_gid;
-		/* set so we do not keep refreshing these fields with
-		   bad data after user has changed them in memory */
-		atomic_set(&cifsInfo->inUse, 1);
-	}
-
-	cifs_set_ops(inode, is_dfs_referral);
-
 
+	/* fill in remaining high mode bits e.g. SUID, VTX */
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
+		cifs_sfu_mode(&fattr, full_path, cifs_sb, xid);
 
+	if (!*pinode) {
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode)
+			rc = -ENOMEM;
+	} else {
+		cifs_fattr_to_inode(*pinode, &fattr);
+	}
 
 cgii_exit:
 	kfree(buf);
@@ -695,33 +602,78 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
 	return full_path;
 }
 
+static int
+cifs_find_inode(struct inode *inode, void *opaque)
+{
+	struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+
+	if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
+		return 0;
+
+	return 1;
+}
+
+static int
+cifs_init_inode(struct inode *inode, void *opaque)
+{
+	struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+
+	CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
+	return 0;
+}
+
+/* Given fattrs, get a corresponding inode */
+struct inode *
+cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
+{
+	unsigned long hash;
+	struct inode *inode;
+
+	cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
+
+	/* hash down to 32-bits on 32-bit arch */
+	hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
+
+	inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
+
+	/* we have fattrs in hand, update the inode */
+	if (inode) {
+		cifs_fattr_to_inode(inode, fattr);
+		if (sb->s_flags & MS_NOATIME)
+			inode->i_flags |= S_NOATIME | S_NOCMTIME;
+		if (inode->i_state & I_NEW) {
+			inode->i_ino = hash;
+			unlock_new_inode(inode);
+		}
+	}
+
+	return inode;
+}
+
 /* gets root inode */
 struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 {
 	int xid;
 	struct cifs_sb_info *cifs_sb;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	long rc;
 	char *full_path;
 
-	inode = iget_locked(sb, ino);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-	if (!(inode->i_state & I_NEW))
-		return inode;
-
-	cifs_sb = CIFS_SB(inode->i_sb);
+	cifs_sb = CIFS_SB(sb);
 	full_path = cifs_build_path_to_root(cifs_sb);
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	xid = GetXid();
 	if (cifs_sb->tcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
-						xid);
+		rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
 	else
-		rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb,
+		rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
 						xid, NULL);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
 	if (rc && cifs_sb->tcon->ipc) {
 		cFYI(1, ("ipc connection - fake read inode"));
 		inode->i_mode |= S_IFDIR;
@@ -737,7 +689,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 		return ERR_PTR(rc);
 	}
 
-	unlock_new_inode(inode);
 
 	kfree(full_path);
 	/* can not call macro FreeXid here since in a void func
@@ -988,8 +939,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 	 * sb->s_vfs_rename_mutex here */
 	full_path = build_path_from_dentry(dentry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -1062,44 +1014,6 @@ out_reval:
 	return rc;
 }
 
-void posix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_BASIC_INFO *pData, int isNewInode)
-{
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	cifsInfo->time = jiffies;
-	atomic_inc(&cifsInfo->inUse);
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	cifs_unix_info_to_inode(tmp_inode, pData, 1);
-	cifs_set_ops(tmp_inode, false);
-
-	if (!S_ISREG(tmp_inode->i_mode))
-		return;
-
-	/*
-	 * No sense invalidating pages for new inode
-	 * since we we have not started caching
-	 * readahead file data yet.
-	 */
-	if (isNewInode)
-		return;
-
-	if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-		(local_size == tmp_inode->i_size)) {
-		cFYI(1, ("inode exists but unchanged"));
-	} else {
-		/* file may have changed on server */
-		cFYI(1, ("invalidate inode, readdir detected change"));
-		invalidate_remote_inode(tmp_inode);
-	}
-}
-
 int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 {
 	int rc = 0, tmprc;
@@ -1108,6 +1022,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	struct cifsTconInfo *pTcon;
 	char *full_path = NULL;
 	struct inode *newinode = NULL;
+	struct cifs_fattr fattr;
 
 	cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode));
 
@@ -1118,8 +1033,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	if ((pTcon->ses->capabilities & CAP_UNIX) &&
@@ -1146,7 +1062,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			cFYI(1, ("posix mkdir returned 0x%x", rc));
 			d_drop(direntry);
 		} else {
-			__u64 unique_id;
 			if (pInfo->Type == cpu_to_le32(-1)) {
 				/* no return info, go query for it */
 				kfree(pInfo);
@@ -1160,20 +1075,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			else
 				direntry->d_op = &cifs_dentry_ops;
 
-			unique_id = le64_to_cpu(pInfo->UniqueId);
-			newinode = cifs_new_inode(inode->i_sb, &unique_id);
-			if (newinode == NULL) {
+			cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
+			newinode = cifs_iget(inode->i_sb, &fattr);
+			if (!newinode) {
 				kfree(pInfo);
 				goto mkdir_get_info;
 			}
 
-			newinode->i_nlink = 2;
 			d_instantiate(direntry, newinode);
 
-			/* we already checked in POSIXCreate whether
-			   frame was long enough */
-			posix_fill_in_inode(direntry->d_inode,
-					pInfo, 1 /* NewInode */);
 #ifdef CONFIG_CIFS_DEBUG2
 			cFYI(1, ("instantiated dentry %p %s to inode %p",
 				direntry, direntry->d_name.name, newinode));
@@ -1236,10 +1146,10 @@ mkdir_get_info:
 				args.uid = NO_CHANGE_64;
 				args.gid = NO_CHANGE_64;
 			}
-			CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
-					    cifs_sb->local_nls,
-					    cifs_sb->mnt_cifs_flags &
-					    CIFS_MOUNT_MAP_SPECIAL_CHR);
+			CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
+					       cifs_sb->local_nls,
+					       cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		} else {
 			if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
 			    (mode & S_IWUGO) == 0) {
@@ -1303,8 +1213,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls,
@@ -1508,8 +1419,9 @@ int cifs_revalidate(struct dentry *direntry)
 	   since that would deadlock */
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
 		 "jiffies %ld", full_path, direntry->d_inode,
@@ -1618,6 +1530,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	if (!err) {
 		generic_fillattr(dentry->d_inode, stat);
 		stat->blksize = CIFS_MAX_MSGSIZE;
+		stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
 	}
 	return err;
 }
@@ -1782,6 +1695,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsTconInfo *pTcon = cifs_sb->tcon;
 	struct cifs_unix_set_info_args *args = NULL;
+	struct cifsFileInfo *open_file;
 
 	cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
 		 direntry->d_name.name, attrs->ia_valid));
@@ -1868,10 +1782,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 		args->ctime = NO_CHANGE_64;
 
 	args->device = 0;
-	rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
-				cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+	open_file = find_writable_file(cifsInode);
+	if (open_file) {
+		u16 nfid = open_file->netfid;
+		u32 npid = open_file->pid;
+		rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
+		atomic_dec(&open_file->wrtPending);
+	} else {
+		rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
+				    cifs_sb->local_nls,
+				    cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
+	}
 
 	if (!rc)
 		rc = inode_setattr(inode, attrs);
@@ -1911,8 +1833,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	/*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cd83c53fcbb5..fc1e0487eaee 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 	full_path = build_path_from_dentry(direntry);
 
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 
 	cFYI(1, ("Full path: %s", full_path));
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 32d6baa0a54f..bd6d6895730d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
 	{0, 0}
 };
 
-/* Convert string containing dotted ip address to binary form */
-/* returns 0 if invalid address */
-
-int
+/*
+ * Convert a string containing text IPv4 or IPv6 address to binary form.
+ *
+ * Returns 0 on failure.
+ */
+static int
 cifs_inet_pton(const int address_family, const char *cp, void *dst)
 {
 	int ret = 0;
@@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
 	return ret;
 }
 
+/*
+ * Try to convert a string to an IPv4 address and then attempt to convert
+ * it to an IPv6 address if that fails. Set the family field if either
+ * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to
+ * treat the part following it as a numeric sin6_scope_id.
+ *
+ * Returns 0 on failure.
+ */
+int
+cifs_convert_address(char *src, void *dst)
+{
+	int rc;
+	char *pct, *endp;
+	struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
+	struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
+
+	/* IPv4 address */
+	if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
+		s4->sin_family = AF_INET;
+		return 1;
+	}
+
+	/* temporarily terminate string */
+	pct = strchr(src, '%');
+	if (pct)
+		*pct = '\0';
+
+	rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
+
+	/* repair temp termination (if any) and make pct point to scopeid */
+	if (pct)
+		*pct++ = '%';
+
+	if (!rc)
+		return rc;
+
+	s6->sin6_family = AF_INET6;
+	if (pct) {
+		s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
+		if (!*pct || *endp)
+			return 0;
+	}
+
+	return rc;
+}
+
 /*****************************************************************************
 convert a NT status code to a dos class/code
  *****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 86d0055dc529..f823a4a208a7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -63,374 +63,123 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
 }
 #endif /* DEBUG2 */
 
-/* Returns 1 if new inode created, 2 if both dentry and inode were */
-/* Might check in the future if inode number changed so we can rehash inode */
-static int
-construct_dentry(struct qstr *qstring, struct file *file,
-		 struct inode **ptmp_inode, struct dentry **pnew_dentry,
-		 __u64 *inum)
+/*
+ * Find the dentry that matches "name". If there isn't one, create one. If it's
+ * a negative dentry or the uniqueid changed, then drop it and recreate it.
+ */
+static struct dentry *
+cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
+		    struct cifs_fattr *fattr)
 {
-	struct dentry *tmp_dentry = NULL;
-	struct super_block *sb = file->f_path.dentry->d_sb;
-	int rc = 0;
+	struct dentry *dentry, *alias;
+	struct inode *inode;
+	struct super_block *sb = parent->d_inode->i_sb;
+
+	cFYI(1, ("For %s", name->name));
+
+	dentry = d_lookup(parent, name);
+	if (dentry) {
+		/* FIXME: check for inode number changes? */
+		if (dentry->d_inode != NULL)
+			return dentry;
+		d_drop(dentry);
+		dput(dentry);
+	}
 
-	cFYI(1, ("For %s", qstring->name));
-
-	qstring->hash = full_name_hash(qstring->name, qstring->len);
-	tmp_dentry = d_lookup(file->f_path.dentry, qstring);
-	if (tmp_dentry) {
-		/* BB: overwrite old name? i.e. tmp_dentry->d_name and
-		 * tmp_dentry->d_name.len??
-		 */
-		cFYI(0, ("existing dentry with inode 0x%p",
-			 tmp_dentry->d_inode));
-		*ptmp_inode = tmp_dentry->d_inode;
-		if (*ptmp_inode == NULL) {
-			*ptmp_inode = cifs_new_inode(sb, inum);
-			if (*ptmp_inode == NULL)
-				return rc;
-			rc = 1;
-		}
-	} else {
-		tmp_dentry = d_alloc(file->f_path.dentry, qstring);
-		if (tmp_dentry == NULL) {
-			cERROR(1, ("Failed allocating dentry"));
-			*ptmp_inode = NULL;
-			return rc;
-		}
+	dentry = d_alloc(parent, name);
+	if (dentry == NULL)
+		return NULL;
 
-		if (CIFS_SB(sb)->tcon->nocase)
-			tmp_dentry->d_op = &cifs_ci_dentry_ops;
-		else
-			tmp_dentry->d_op = &cifs_dentry_ops;
+	inode = cifs_iget(sb, fattr);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
 
-		*ptmp_inode = cifs_new_inode(sb, inum);
-		if (*ptmp_inode == NULL)
-			return rc;
-		rc = 2;
+	if (CIFS_SB(sb)->tcon->nocase)
+		dentry->d_op = &cifs_ci_dentry_ops;
+	else
+		dentry->d_op = &cifs_dentry_ops;
+
+	alias = d_materialise_unique(dentry, inode);
+	if (alias != NULL) {
+		dput(dentry);
+		if (IS_ERR(alias))
+			return NULL;
+		dentry = alias;
 	}
 
-	tmp_dentry->d_time = jiffies;
-	*pnew_dentry = tmp_dentry;
-	return rc;
+	return dentry;
 }
 
-static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
-			  char *buf, unsigned int *pobject_type, int isNewInode)
+static void
+cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-	__u32 attr;
-	__u64 allocation_size;
-	__u64 end_of_file;
-	umode_t default_mode;
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	if (new_buf_type) {
-		FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
-
-		attr = le32_to_cpu(pfindData->ExtFileAttributes);
-		allocation_size = le64_to_cpu(pfindData->AllocationSize);
-		end_of_file = le64_to_cpu(pfindData->EndOfFile);
-		tmp_inode->i_atime =
-			cifs_NTtimeToUnix(pfindData->LastAccessTime);
-		tmp_inode->i_mtime =
-			cifs_NTtimeToUnix(pfindData->LastWriteTime);
-		tmp_inode->i_ctime =
-			cifs_NTtimeToUnix(pfindData->ChangeTime);
-	} else { /* legacy, OS2 and DOS style */
-		int offset = cifs_sb->tcon->ses->server->timeAdj;
-		FIND_FILE_STANDARD_INFO *pfindData =
-			(FIND_FILE_STANDARD_INFO *)buf;
-
-		tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
-						    pfindData->LastWriteTime,
-						    offset);
-		tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
-						    pfindData->LastAccessTime,
-						    offset);
-		tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
-						    pfindData->LastWriteTime,
-						    offset);
-		attr = le16_to_cpu(pfindData->Attributes);
-		allocation_size = le32_to_cpu(pfindData->AllocationSize);
-		end_of_file = le32_to_cpu(pfindData->DataSize);
-	}
+	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_gid = cifs_sb->mnt_gid;
 
-	/* Linux can not store file creation time unfortunately so ignore it */
-
-	cifsInfo->cifsAttrs = attr;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
-		/* get more accurate mode via ACL - so force inode refresh */
-		cifsInfo->time = 0;
-	} else
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
-		cifsInfo->time = jiffies;
-
-	/* treat dos attribute of read-only as read-only mode bit e.g. 555? */
-	/* 2767 perms - indicate mandatory locking */
-		/* BB fill in uid and gid here? with help from winbind?
-		   or retrieve from NTFS stream extended attribute */
-	if (atomic_read(&cifsInfo->inUse) == 0) {
-		tmp_inode->i_uid = cifs_sb->mnt_uid;
-		tmp_inode->i_gid = cifs_sb->mnt_gid;
-	}
-
-	if (attr & ATTR_DIRECTORY)
-		default_mode = cifs_sb->mnt_dir_mode;
-	else
-		default_mode = cifs_sb->mnt_file_mode;
-
-	/* set initial permissions */
-	if ((atomic_read(&cifsInfo->inUse) == 0) ||
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-		tmp_inode->i_mode = default_mode;
-	else {
-		/* just reenable write bits if !ATTR_READONLY */
-		if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
-		    (attr & ATTR_READONLY) == 0)
-			tmp_inode->i_mode |= (S_IWUGO & default_mode);
-
-		tmp_inode->i_mode &= ~S_IFMT;
+	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
+		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_dtype = DT_DIR;
+	} else {
+		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_dtype = DT_REG;
 	}
 
-	/* clear write bits if ATTR_READONLY is set */
-	if (attr & ATTR_READONLY)
-		tmp_inode->i_mode &= ~S_IWUGO;
+	if (fattr->cf_cifsattrs & ATTR_READONLY)
+		fattr->cf_mode &= ~S_IWUGO;
 
-	/* set inode type */
-	if ((attr & ATTR_SYSTEM) &&
-	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-		if (end_of_file == 0)  {
-			tmp_inode->i_mode |= S_IFIFO;
-			*pobject_type = DT_FIFO;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
+	    fattr->cf_cifsattrs & ATTR_SYSTEM) {
+		if (fattr->cf_eof == 0)  {
+			fattr->cf_mode &= ~S_IFMT;
+			fattr->cf_mode |= S_IFIFO;
+			fattr->cf_dtype = DT_FIFO;
 		} else {
 			/*
-			 * trying to get the type can be slow, so just call
-			 * this a regular file for now, and mark for reval
+			 * trying to get the type and mode via SFU can be slow,
+			 * so just call those regular files for now, and mark
+			 * for reval
 			 */
-			tmp_inode->i_mode |= S_IFREG;
-			*pobject_type = DT_REG;
-			cifsInfo->time = 0;
-		}
-	} else {
-		if (attr & ATTR_DIRECTORY) {
-			tmp_inode->i_mode |= S_IFDIR;
-			*pobject_type = DT_DIR;
-		} else {
-			tmp_inode->i_mode |= S_IFREG;
-			*pobject_type = DT_REG;
+			fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
 		}
 	}
+}
 
-	/* can not fill in nlink here as in qpathinfo version and Unx search */
-	if (atomic_read(&cifsInfo->inUse) == 0)
-		atomic_set(&cifsInfo->inUse, 1);
-
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&tmp_inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		client is writing to it due to potential races */
-		i_size_write(tmp_inode, end_of_file);
-
-	/* 512 bytes (2**9) is the fake blocksize that must be used */
-	/* for this calculation, even though the reported blocksize is larger */
-		tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9;
-	}
-	spin_unlock(&tmp_inode->i_lock);
-
-	if (allocation_size < end_of_file)
-		cFYI(1, ("May be sparse file, allocation less than file size"));
-	cFYI(1, ("File Size %ld and blocks %llu",
-		(unsigned long)tmp_inode->i_size,
-		(unsigned long long)tmp_inode->i_blocks));
-	if (S_ISREG(tmp_inode->i_mode)) {
-		cFYI(1, ("File inode"));
-		tmp_inode->i_op = &cifs_file_inode_ops;
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
-			else
-				tmp_inode->i_fop = &cifs_file_direct_ops;
-		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			tmp_inode->i_fop = &cifs_file_nobrl_ops;
-		else
-			tmp_inode->i_fop = &cifs_file_ops;
-
-		if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
-		   (cifs_sb->tcon->ses->server->maxBuf <
-			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-		else
-			tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
-		if (isNewInode)
-			return; /* No sense invalidating pages for new inode
-				   since have not started caching readahead file
-				   data yet */
-
-		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-			(local_size == tmp_inode->i_size)) {
-			cFYI(1, ("inode exists but unchanged"));
-		} else {
-			/* file may have changed on server */
-			cFYI(1, ("invalidate inode, readdir detected change"));
-			invalidate_remote_inode(tmp_inode);
-		}
-	} else if (S_ISDIR(tmp_inode->i_mode)) {
-		cFYI(1, ("Directory inode"));
-		tmp_inode->i_op = &cifs_dir_inode_ops;
-		tmp_inode->i_fop = &cifs_dir_ops;
-	} else if (S_ISLNK(tmp_inode->i_mode)) {
-		cFYI(1, ("Symbolic Link inode"));
-		tmp_inode->i_op = &cifs_symlink_inode_ops;
-	} else {
-		cFYI(1, ("Init special inode"));
-		init_special_inode(tmp_inode, tmp_inode->i_mode,
-				   tmp_inode->i_rdev);
-	}
+void
+cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
+		       struct cifs_sb_info *cifs_sb)
+{
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
+	fattr->cf_eof = le64_to_cpu(info->EndOfFile);
+	fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
+	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
+	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
+	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+
+	cifs_fill_common_info(fattr, cifs_sb);
 }
 
-static void unix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
+void
+cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info,
+		       struct cifs_sb_info *cifs_sb)
 {
-	loff_t local_size;
-	struct timespec local_mtime;
-
-	struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
-
-	__u32 type = le32_to_cpu(pfindData->Type);
-	__u64 num_of_bytes = le64_to_cpu(pfindData->NumOfBytes);
-	__u64 end_of_file = le64_to_cpu(pfindData->EndOfFile);
-	cifsInfo->time = jiffies;
-	atomic_inc(&cifsInfo->inUse);
-
-	/* save mtime and size */
-	local_mtime = tmp_inode->i_mtime;
-	local_size  = tmp_inode->i_size;
-
-	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(pfindData->LastAccessTime);
-	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(pfindData->LastModificationTime);
-	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(pfindData->LastStatusChange);
-
-	tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
-	/* since we set the inode type below we need to mask off type
-	   to avoid strange results if bits above were corrupt */
-	tmp_inode->i_mode &= ~S_IFMT;
-	if (type == UNIX_FILE) {
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-	} else if (type == UNIX_SYMLINK) {
-		*pobject_type = DT_LNK;
-		tmp_inode->i_mode |= S_IFLNK;
-	} else if (type == UNIX_DIR) {
-		*pobject_type = DT_DIR;
-		tmp_inode->i_mode |= S_IFDIR;
-	} else if (type == UNIX_CHARDEV) {
-		*pobject_type = DT_CHR;
-		tmp_inode->i_mode |= S_IFCHR;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
-				le64_to_cpu(pfindData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_BLOCKDEV) {
-		*pobject_type = DT_BLK;
-		tmp_inode->i_mode |= S_IFBLK;
-		tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
-				le64_to_cpu(pfindData->DevMinor) & MINORMASK);
-	} else if (type == UNIX_FIFO) {
-		*pobject_type = DT_FIFO;
-		tmp_inode->i_mode |= S_IFIFO;
-	} else if (type == UNIX_SOCKET) {
-		*pobject_type = DT_SOCK;
-		tmp_inode->i_mode |= S_IFSOCK;
-	} else {
-		/* safest to just call it a file */
-		*pobject_type = DT_REG;
-		tmp_inode->i_mode |= S_IFREG;
-		cFYI(1, ("unknown inode type %d", type));
-	}
+	int offset = cifs_sb->tcon->ses->server->timeAdj;
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
-		tmp_inode->i_uid = cifs_sb->mnt_uid;
-	else
-		tmp_inode->i_uid = le64_to_cpu(pfindData->Uid);
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
-		tmp_inode->i_gid = cifs_sb->mnt_gid;
-	else
-		tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
-	tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
-
-	cifsInfo->server_eof = end_of_file;
-	spin_lock(&tmp_inode->i_lock);
-	if (is_size_safe_to_change(cifsInfo, end_of_file)) {
-		/* can not safely change the file size here if the
-		client is writing to it due to potential races */
-		i_size_write(tmp_inode, end_of_file);
-
-	/* 512 bytes (2**9) is the fake blocksize that must be used */
-	/* for this calculation, not the real blocksize */
-		tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
-	}
-	spin_unlock(&tmp_inode->i_lock);
+	memset(fattr, 0, sizeof(*fattr));
+	fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate,
+					    info->LastAccessTime, offset);
+	fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate,
+					    info->LastWriteTime, offset);
+	fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate,
+					    info->LastWriteTime, offset);
 
-	if (S_ISREG(tmp_inode->i_mode)) {
-		cFYI(1, ("File inode"));
-		tmp_inode->i_op = &cifs_file_inode_ops;
+	fattr->cf_cifsattrs = le16_to_cpu(info->Attributes);
+	fattr->cf_bytes = le32_to_cpu(info->AllocationSize);
+	fattr->cf_eof = le32_to_cpu(info->DataSize);
 
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-				tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
-			else
-				tmp_inode->i_fop = &cifs_file_direct_ops;
-		} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
-			tmp_inode->i_fop = &cifs_file_nobrl_ops;
-		else
-			tmp_inode->i_fop = &cifs_file_ops;
-
-		if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
-		   (cifs_sb->tcon->ses->server->maxBuf <
-			PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
-			tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
-		else
-			tmp_inode->i_data.a_ops = &cifs_addr_ops;
-
-		if (isNewInode)
-			return; /* No sense invalidating pages for new inode
-				   since we have not started caching readahead
-				   file data for it yet */
-
-		if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
-			(local_size == tmp_inode->i_size)) {
-			cFYI(1, ("inode exists but unchanged"));
-		} else {
-			/* file may have changed on server */
-			cFYI(1, ("invalidate inode, readdir detected change"));
-			invalidate_remote_inode(tmp_inode);
-		}
-	} else if (S_ISDIR(tmp_inode->i_mode)) {
-		cFYI(1, ("Directory inode"));
-		tmp_inode->i_op = &cifs_dir_inode_ops;
-		tmp_inode->i_fop = &cifs_dir_ops;
-	} else if (S_ISLNK(tmp_inode->i_mode)) {
-		cFYI(1, ("Symbolic Link inode"));
-		tmp_inode->i_op = &cifs_symlink_inode_ops;
-/* tmp_inode->i_fop = *//* do not need to set to anything */
-	} else {
-		cFYI(1, ("Special inode"));
-		init_special_inode(tmp_inode, tmp_inode->i_mode,
-				   tmp_inode->i_rdev);
-	}
+	cifs_fill_common_info(fattr, cifs_sb);
 }
 
 /* BB eventually need to add the following helper function to
@@ -872,7 +621,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 			len = strnlen(filename, PATH_MAX);
 		}
 
-		*pinum = le64_to_cpu(pFindData->UniqueId);
+		*pinum = le64_to_cpu(pFindData->basic.UniqueId);
 	} else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
 		FILE_DIRECTORY_INFO *pFindData =
 			(FILE_DIRECTORY_INFO *)current_entry;
@@ -932,11 +681,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	int rc = 0;
 	struct qstr qstring;
 	struct cifsFileInfo *pCifsF;
-	unsigned int obj_type;
-	__u64  inum;
+	u64    inum;
+	ino_t  ino;
+	struct super_block *sb;
 	struct cifs_sb_info *cifs_sb;
-	struct inode *tmp_inode;
 	struct dentry *tmp_dentry;
+	struct cifs_fattr fattr;
 
 	/* get filename and len into qstring */
 	/* get dentry */
@@ -954,60 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	if (rc != 0)
 		return 0;
 
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	sb = file->f_path.dentry->d_sb;
+	cifs_sb = CIFS_SB(sb);
 
 	qstring.name = scratch_buf;
 	rc = cifs_get_name_from_search_buf(&qstring, pfindEntry,
 			pCifsF->srch_inf.info_level,
 			pCifsF->srch_inf.unicode, cifs_sb,
-			max_len,
-			&inum /* returned */);
+			max_len, &inum /* returned */);
 
 	if (rc)
 		return rc;
 
-	/* only these two infolevels return valid inode numbers */
-	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX ||
-	    pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
-		rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
-					&inum);
-	else
-		rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
-					NULL);
-
-	if ((tmp_inode == NULL) || (tmp_dentry == NULL))
-		return -ENOMEM;
-
-	/* we pass in rc below, indicating whether it is a new inode,
-	   so we can figure out whether to invalidate the inode cached
-	   data if the file has changed */
 	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
-		unix_fill_in_inode(tmp_inode,
-				   (FILE_UNIX_INFO *)pfindEntry,
-				   &obj_type, rc);
+		cifs_unix_basic_to_fattr(&fattr,
+				 &((FILE_UNIX_INFO *) pfindEntry)->basic,
+				 cifs_sb);
 	else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
-		fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */,
-				pfindEntry, &obj_type, rc);
+		cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *)
+					pfindEntry, cifs_sb);
 	else
-		fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc);
+		cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)
+					pfindEntry, cifs_sb);
 
-	if (rc) /* new inode - needs to be tied to dentry */ {
-		d_instantiate(tmp_dentry, tmp_inode);
-		if (rc == 2)
-			d_rehash(tmp_dentry);
-	}
+	/* FIXME: make _to_fattr functions fill this out */
+	if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
+		fattr.cf_uniqueid = inum;
+	else
+		fattr.cf_uniqueid = iunique(sb, ROOT_I);
 
+	ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
+	tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr);
 
 	rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
-		     tmp_inode->i_ino, obj_type);
+		     ino, fattr.cf_dtype);
+
+	/*
+	 * we can not return filldir errors to the caller since they are
+	 * "normal" when the stat blocksize is too small - we return remapped
+	 * error instead
+	 *
+	 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
+	 * case already. Why should we be clobbering other errors from it?
+	 */
 	if (rc) {
 		cFYI(1, ("filldir rc = %d", rc));
-		/* we can not return filldir errors to the caller
-		since they are "normal" when the stat blocksize
-		is too small - we return remapped error instead */
 		rc = -EOVERFLOW;
 	}
-
 	dput(tmp_dentry);
 	return rc;
 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 897a052270f9..7085a6275c4c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate:
 #endif /* CONFIG_CIFS_UPCALL */
 	} else {
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-		if ((experimEnabled > 1) && (type == RawNTLMSSP)) {
+		if (type == RawNTLMSSP) {
 			if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
 				cERROR(1, ("NTLMSSP requires Unicode support"));
 				rc = -ENOSYS;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index e9527eedc639..a75afa3dd9e1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	if (ea_name == NULL) {
 		cFYI(1, ("Null xattr names not supported"));
@@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
@@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
@@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
+		rc = -ENOMEM;
 		FreeXid(xid);
-		return -ENOMEM;
+		return rc;
 	}
 	/* return dos attributes as pseudo xattr */
 	/* return alt name if available as pseudo attr */
diff --git a/fs/compat.c b/fs/compat.c
index cdd51a3a7c53..fbadb947727b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1486,8 +1486,8 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
-	if (retval < 0)
+	retval = -ERESTARTNOINTR;
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
 		goto out_free;
 	current->in_execve = 1;
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c5ded5ff72b5..626c7483b4de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vt.h>
+#include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
@@ -94,7 +95,6 @@
 #include <linux/atm_tcp.h>
 #include <linux/sonet.h>
 #include <linux/atm_suni.h>
-#include <linux/mtd/mtd.h>
 
 #include <linux/usb.h>
 #include <linux/usbdevice_fs.h>
@@ -1405,46 +1405,6 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 
-struct mtd_oob_buf32 {
-	u_int32_t start;
-	u_int32_t length;
-	compat_caddr_t ptr;	/* unsigned char* */
-};
-
-#define MEMWRITEOOB32 	_IOWR('M',3,struct mtd_oob_buf32)
-#define MEMREADOOB32 	_IOWR('M',4,struct mtd_oob_buf32)
-
-static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	struct mtd_oob_buf __user *buf = compat_alloc_user_space(sizeof(*buf));
-	struct mtd_oob_buf32 __user *buf32 = compat_ptr(arg);
-	u32 data;
-	char __user *datap;
-	unsigned int real_cmd;
-	int err;
-
-	real_cmd = (cmd == MEMREADOOB32) ?
-		MEMREADOOB : MEMWRITEOOB;
-
-	if (copy_in_user(&buf->start, &buf32->start,
-			 2 * sizeof(u32)) ||
-	    get_user(data, &buf32->ptr))
-		return -EFAULT;
-	datap = compat_ptr(data);
-	if (put_user(datap, &buf->ptr))
-		return -EFAULT;
-
-	err = sys_ioctl(fd, real_cmd, (unsigned long) buf);
-
-	if (!err) {
-		if (copy_in_user(&buf32->start, &buf->start,
-				 2 * sizeof(u32)))
-			err = -EFAULT;
-	}
-
-	return err;
-}	
-
 #ifdef CONFIG_BLOCK
 struct raw32_config_request
 {
@@ -1820,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return sys_ioctl(fd, cmd, (unsigned long)tn);
 }
 
+/* on ia32 l_start is on a 32-bit boundary */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+struct space_resv_32 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start	__attribute__((packed));
+			/* len == 0 means until end of file */
+	__s64		l_len __attribute__((packed));
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserve area */
+};
+
+#define FS_IOC_RESVSP_32		_IOW ('X', 40, struct space_resv_32)
+#define FS_IOC_RESVSP64_32	_IOW ('X', 42, struct space_resv_32)
+
+/* just account for different alignment */
+static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
+{
+	struct space_resv_32	__user *p32 = (void __user *)arg;
+	struct space_resv	__user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(&p->l_type,	&p32->l_type,	sizeof(s16)) ||
+	    copy_in_user(&p->l_whence,	&p32->l_whence, sizeof(s16)) ||
+	    copy_in_user(&p->l_start,	&p32->l_start,	sizeof(s64)) ||
+	    copy_in_user(&p->l_len,	&p32->l_len,	sizeof(s64)) ||
+	    copy_in_user(&p->l_sysid,	&p32->l_sysid,	sizeof(s32)) ||
+	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
+	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
+		return -EFAULT;
+
+	return ioctl_preallocate(file, p);
+}
+#endif
+
 
 typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
 					unsigned long, struct file *);
@@ -2426,15 +2421,6 @@ COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
 COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
 COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
 COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
-/* MTD */
-COMPATIBLE_IOCTL(MEMGETINFO)
-COMPATIBLE_IOCTL(MEMERASE)
-COMPATIBLE_IOCTL(MEMLOCK)
-COMPATIBLE_IOCTL(MEMUNLOCK)
-COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
-COMPATIBLE_IOCTL(MEMGETREGIONINFO)
-COMPATIBLE_IOCTL(MEMGETBADBLOCK)
-COMPATIBLE_IOCTL(MEMSETBADBLOCK)
 /* NBD */
 ULONG_IOCTL(NBD_SET_SOCK)
 ULONG_IOCTL(NBD_SET_BLKSIZE)
@@ -2544,8 +2530,6 @@ COMPATIBLE_IOCTL(JSIOCGBUTTONS)
 COMPATIBLE_IOCTL(JSIOCGNAME(0))
 
 /* now things that need handlers */
-HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
-HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
 #ifdef CONFIG_NET
 HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
 HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
@@ -2808,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	case FIOQSIZE:
 		break;
 
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+	case FS_IOC_RESVSP_32:
+	case FS_IOC_RESVSP64_32:
+		error = compat_ioctl_preallocate(filp, arg);
+		goto out_fput;
+#else
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		error = ioctl_preallocate(filp, (void __user *)arg);
+		goto out_fput;
+#endif
+
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9b1d285f9fe6..75efb028974b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type devpts_fs_type = {
-	.owner		= THIS_MODULE,
 	.name		= "devpts",
 	.get_sb		= devpts_get_sb,
 	.kill_sb	= devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
 	}
 	return err;
 }
-
-static void __exit exit_devpts_fs(void)
-{
-	unregister_filesystem(&devpts_fs_type);
-	mntput(devpts_mnt);
-}
-
 module_init(init_devpts_fs)
-module_exit(exit_devpts_fs)
-MODULE_LICENSE("GPL");
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/anon_inodes.h>
-#include <linux/eventfd.h>
 #include <linux/syscalls.h>
 #include <linux/module.h>
+#include <linux/kref.h>
+#include <linux/eventfd.h>
 
 struct eventfd_ctx {
+	struct kref kref;
 	wait_queue_head_t wqh;
 	/*
 	 * Every time that a write(2) is performed on an eventfd, the
 	 * value of the __u64 being written is added to "count" and a
 	 * wakeup is performed on "wqh". A read(2) will return the "count"
 	 * value to userspace, and will reset "count" to zero. The kernel
-	 * size eventfd_signal() also, adds to the "count" counter and
+	 * side eventfd_signal() also, adds to the "count" counter and
 	 * issue a wakeup.
 	 */
 	__u64 count;
 	unsigned int flags;
 };
 
-/*
- * Adds "n" to the eventfd counter "count". Returns "n" in case of
- * success, or a value lower then "n" in case of coutner overflow.
- * This function is supposed to be called by the kernel in paths
- * that do not allow sleeping. In this function we allow the counter
- * to reach the ULLONG_MAX value, and we signal this as overflow
- * condition by returining a POLLERR to poll(2).
+/**
+ * eventfd_signal - Adds @n to the eventfd counter.
+ * @ctx: [in] Pointer to the eventfd context.
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
+ *          The value cannot be negative.
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returining a POLLERR
+ * to poll(2).
+ *
+ * Returns @n in case of success, a non-negative number lower than @n in case
+ * of overflow, or the following error codes:
+ *
+ * -EINVAL    : The value of @n is negative.
  */
-int eventfd_signal(struct file *file, int n)
+int eventfd_signal(struct eventfd_ctx *ctx, int n)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	unsigned long flags;
 
 	if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free(struct kref *kref)
+{
+	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
+
+	kfree(ctx);
+}
+
+/**
+ * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to the eventfd context.
+ *
+ * Returns: In case of success, returns a pointer to the eventfd context.
+ */
+struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_get);
+
+/**
+ * eventfd_ctx_put - Releases a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to eventfd context.
+ *
+ * The eventfd context reference must have been previously acquired either
+ * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ */
+void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+	kref_put(&ctx->kref, eventfd_free);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_put);
+
 static int eventfd_release(struct inode *inode, struct file *file)
 {
-	kfree(file->private_data);
+	struct eventfd_ctx *ctx = file->private_data;
+
+	wake_up_poll(&ctx->wqh, POLLHUP);
+	eventfd_ctx_put(ctx);
 	return 0;
 }
 
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
 	.write		= eventfd_write,
 };
 
+/**
+ * eventfd_fget - Acquire a reference of an eventfd file descriptor.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the eventfd file structure in case of success, or the
+ * following error pointer:
+ *
+ * -EBADF    : Invalid @fd file descriptor.
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
 struct file *eventfd_fget(int fd)
 {
 	struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
 }
 EXPORT_SYMBOL_GPL(eventfd_fget);
 
+/**
+ * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointers returned by the following functions:
+ *
+ * eventfd_fget
+ */
+struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+	struct file *file;
+	struct eventfd_ctx *ctx;
+
+	file = eventfd_fget(fd);
+	if (IS_ERR(file))
+		return (struct eventfd_ctx *) file;
+	ctx = eventfd_ctx_get(file->private_data);
+	fput(file);
+
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
+
+/**
+ * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
+ * @file: [in] Eventfd file pointer.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointer:
+ *
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
+struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
+{
+	if (file->f_op != &eventfd_fops)
+		return ERR_PTR(-EINVAL);
+
+	return eventfd_ctx_get(file->private_data);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
+
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
 	int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	if (!ctx)
 		return -ENOMEM;
 
+	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
diff --git a/fs/exec.c b/fs/exec.c
index e639957d7a57..4a8849e45b21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,8 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
-	if (retval < 0)
+	retval = -ERESTARTNOINTR;
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
 		goto out_free;
 	current->in_execve = 1;
 
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d46e38cb85c5..d636e1297cad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,37 +125,12 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT2_ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-static inline void
-ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		   struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT2_ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * inode->i_mutex: don't care
  */
 static struct posix_acl *
 ext2_get_acl(struct inode *inode, int type)
 {
-	struct ext2_inode_info *ei = EXT2_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext2_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT2_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext2_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT2_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
 	retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &ei->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type)
 static int
 ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct ext2_inode_info *ei = EXT2_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	error = ext2_xattr_set(inode, name_index, "", value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &ei->i_acl, acl);
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
 	return error;
 }
 
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index b42cf578554b..ecefe478898f 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
 
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
-/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT2_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d988a718aedb..9a8a8e27a063 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -47,10 +47,6 @@ struct ext2_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
 	rwlock_t i_meta_lock;
 
 	/*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 29ed682061f6..e27130341d4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT2_I(inode);
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	ei->i_acl = EXT2_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 
 	raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 6524ecaebb7a..e1dedb0f7873 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
 	inode = NULL;
 	if (ino) {
 		inode = ext2_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
+		if (unlikely(IS_ERR(inode))) {
+			if (PTR_ERR(inode) == -ESTALE) {
+				ext2_error(dir->i_sb, __func__,
+						"deleted inode referenced: %lu",
+						ino);
+				return ERR_PTR(-EIO);
+			} else {
+				return ERR_CAST(inode);
+			}
+		}
 	}
 	return d_splice_alias(inode, dentry);
 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 458999638c3d..1a9ffee47d56 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	ei->i_acl = EXT2_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
 static void ext2_clear_inode(struct inode *inode)
 {
 	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT2_FS_POSIX_ACL
-	struct ext2_inode_info *ei = EXT2_I(inode);
-
-	if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
-		posix_acl_release(ei->i_acl);
-		ei->i_acl = EXT2_ACL_NOT_CACHED;
-	}
-	if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
-		posix_acl_release(ei->i_default_acl);
-		ei->i_default_acl = EXT2_ACL_NOT_CACHED;
-	}
-#endif
 	ext2_discard_reservation(inode);
 	EXT2_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e0c745451715..e167bae37ef0 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != EXT3_ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
+
 	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_NOFS);
@@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -226,7 +187,6 @@ static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
-				break;
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
+
 	return error;
 }
 
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac0..07d15a3a5969 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
 
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 05dea8132fc0..5f51fed5c750 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT3_I(inode);
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 
 	ret = __ext3_get_inode_loc(inode, &iloc, 0);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 601e881e6105..524b349c6299 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
 static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	if (EXT3_I(inode)->i_acl &&
-			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_acl);
-		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-	}
-	if (EXT3_I(inode)->i_default_acl &&
-			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_default_acl);
-		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
-	}
-#endif
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 605aeed96d68..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != EXT4_ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT4_ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext4_get_acl(struct inode *inode, int type)
 {
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = ext4_iget_acl(inode, &ei->i_acl);
-		if (acl != EXT4_ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		break;
-
 	case ACL_TYPE_DEFAULT:
-		acl = ext4_iget_acl(inode, &ei->i_default_acl);
-		if (acl != EXT4_ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 		break;
-
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &ei->i_acl, acl);
-			break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &ei->i_default_acl, acl);
-			break;
-		}
-	}
 	return acl;
 }
 
@@ -226,7 +186,6 @@ static int
 ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &ei->i_acl, acl);
-			break;
+	if (!error)
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &ei->i_default_acl, acl);
-			break;
-		}
-	}
 	return error;
 }
 
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
 
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
-/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT4_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext4_permission(struct inode *, int);
 extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 17b9998680e3..0ddf7e55abe1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -595,10 +595,6 @@ struct ext4_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
 
 	struct list_head i_orphan;	/* unlinked but open inodes */
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c17ae275af4..60a26f3a6f8b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT4_I(inode);
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	ei->i_acl = EXT4_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
 
 	ret = __ext4_get_inode_loc(inode, &iloc, 0);
 	if (ret < 0)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bb9e2d3e4b8..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	if (!ei)
 		return NULL;
 
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	ei->i_acl = EXT4_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
-#endif
 	ei->vfs_inode.i_version = 1;
 	ei->vfs_inode.i_data.writeback_index = 0;
 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -735,18 +731,6 @@ static void destroy_inodecache(void)
 
 static void ext4_clear_inode(struct inode *inode)
 {
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	if (EXT4_I(inode)->i_acl &&
-			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
-		posix_acl_release(EXT4_I(inode)->i_acl);
-		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
-	}
-	if (EXT4_I(inode)->i_default_acl &&
-			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
-		posix_acl_release(EXT4_I(inode)->i_default_acl);
-		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
-	}
-#endif
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b28ea646ff60..f042b965c95c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	return 0;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index caf049146ca2..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
 EXPORT_SYMBOL(sb_has_dirty_inodes);
 
 /*
- * Write a single inode's dirty pages and inode data out to disk.
+ * Wait for writeback on an inode to complete.
+ */
+static void inode_wait_for_writeback(struct inode *inode)
+{
+	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
+	wait_queue_head_t *wqh;
+
+	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	do {
+		spin_unlock(&inode_lock);
+		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+		spin_lock(&inode_lock);
+	} while (inode->i_state & I_SYNC);
+}
+
+/*
+ * Write out an inode's dirty pages.  Called under inode_lock.  Either the
+ * caller has ref on the inode (either via __iget or via syscall against an fd)
+ * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ *
  * If `wait' is set, wait on the writeout.
  *
  * The whole writeout design is quite complex and fragile.  We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
  * Called under inode_lock.
  */
 static int
-__sync_single_inode(struct inode *inode, struct writeback_control *wbc)
+writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	unsigned dirty;
 	struct address_space *mapping = inode->i_mapping;
 	int wait = wbc->sync_mode == WB_SYNC_ALL;
+	unsigned dirty;
 	int ret;
 
+	if (!atomic_read(&inode->i_count))
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+	else
+		WARN_ON(inode->i_state & I_WILL_FREE);
+
+	if (inode->i_state & I_SYNC) {
+		/*
+		 * If this inode is locked for writeback and we are not doing
+		 * writeback-for-data-integrity, move it to s_more_io so that
+		 * writeback can proceed with the other inodes on s_io.
+		 *
+		 * We'll have another go at writing back this inode when we
+		 * completed a full scan of s_io.
+		 */
+		if (!wait) {
+			requeue_io(inode);
+			return 0;
+		}
+
+		/*
+		 * It's a data-integrity sync.  We must wait.
+		 */
+		inode_wait_for_writeback(inode);
+	}
+
 	BUG_ON(inode->i_state & I_SYNC);
 
 	/* Set I_SYNC, reset I_DIRTY */
@@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 }
 
 /*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
- */
-static int
-__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	wait_queue_head_t *wqh;
-
-	if (!atomic_read(&inode->i_count))
-		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
-	else
-		WARN_ON(inode->i_state & I_WILL_FREE);
-
-	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
-		/*
-		 * We're skipping this inode because it's locked, and we're not
-		 * doing writeback-for-data-integrity.  Move it to s_more_io so
-		 * that writeback can proceed with the other inodes on s_io.
-		 * We'll have another go at writing back this inode when we
-		 * completed a full scan of s_io.
-		 */
-		requeue_io(inode);
-		return 0;
-	}
-
-	/*
-	 * It's a data-integrity sync.  We must wait.
-	 */
-	if (inode->i_state & I_SYNC) {
-		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
-
-		wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-		do {
-			spin_unlock(&inode_lock);
-			__wait_on_bit(wqh, &wq, inode_wait,
-							TASK_UNINTERRUPTIBLE);
-			spin_lock(&inode_lock);
-		} while (inode->i_state & I_SYNC);
-	}
-	return __sync_single_inode(inode, wbc);
-}
-
-/*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
  *
@@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
 		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
-		__writeback_single_inode(inode, wbc);
+		writeback_single_inode(inode, wbc);
 		if (current_is_pdflush())
 			writeback_release(bdi);
 		if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
 
 	might_sleep();
 	spin_lock(&inode_lock);
-	ret = __writeback_single_inode(inode, &wbc);
+	ret = writeback_single_inode(inode, &wbc);
 	spin_unlock(&inode_lock);
 	if (sync)
 		inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 	int ret;
 
 	spin_lock(&inode_lock);
-	ret = __writeback_single_inode(inode, wbc);
+	ret = writeback_single_inode(inode, wbc);
 	spin_unlock(&inode_lock);
 	return ret;
 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8fed2ed12f38..cbceacbc0bf9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
@@ -286,8 +287,8 @@ __releases(&fc->lock)
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 		    fc->connected && fc->bdi_initialized) {
-			clear_bdi_congested(&fc->bdi, READ);
-			clear_bdi_congested(&fc->bdi, WRITE);
+			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 		}
 		fc->num_background--;
 		fc->active_background--;
@@ -414,8 +415,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 		fc->blocked = 1;
 	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 	    fc->bdi_initialized) {
-		set_bdi_congested(&fc->bdi, READ);
-		set_bdi_congested(&fc->bdi, WRITE);
+		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 	}
 	list_add_tail(&req->list, &fc->bg_queue);
 	flush_bg_queue(fc);
@@ -849,6 +850,81 @@ err:
 	return err;
 }
 
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_inode_out outarg;
+	int err = -EINVAL;
+
+	if (size != sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+				       outarg.off, outarg.len);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_entry_out outarg;
+	int err = -EINVAL;
+	char buf[FUSE_NAME_MAX+1];
+	struct qstr name;
+
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -856,6 +932,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_POLL:
 		return fuse_notify_poll(fc, size, cs);
 
+	case FUSE_NOTIFY_INVAL_INODE:
+		return fuse_notify_inval_inode(fc, size, cs);
+
+	case FUSE_NOTIFY_INVAL_ENTRY:
+		return fuse_notify_inval_entry(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
@@ -910,7 +992,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
 	int err;
-	unsigned nbytes = iov_length(iov, nr_segs);
+	size_t nbytes = iov_length(iov, nr_segs);
 	struct fuse_req *req;
 	struct fuse_out_header oh;
 	struct fuse_copy_state cs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a083d30..e703654e7f40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
 	struct fuse_req *forget_req;
-	struct fuse_open_in inarg;
+	struct fuse_create_in inarg;
 	struct fuse_open_out outopen;
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (!ff)
 		goto out_put_request;
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	flags &= ~O_NOCTTY;
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outentry, 0, sizeof(outentry));
 	inarg.flags = flags;
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_CREATE;
 	req->in.h.nodeid = get_node_id(dir);
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
 	inarg.rdev = new_encode_dev(rdev);
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKNOD;
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKDIR;
 	req->in.numargs = 2;
 	req->in.args[0].size = sizeof(inarg);
@@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 	return err;
 }
 
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name)
+{
+	int err = -ENOTDIR;
+	struct inode *parent;
+	struct dentry *dir;
+	struct dentry *entry;
+
+	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+	if (!parent)
+		return -ENOENT;
+
+	mutex_lock(&parent->i_mutex);
+	if (!S_ISDIR(parent->i_mode))
+		goto unlock;
+
+	err = -ENOENT;
+	dir = d_find_alias(parent);
+	if (!dir)
+		goto unlock;
+
+	entry = d_lookup(dir, name);
+	dput(dir);
+	if (!entry)
+		goto unlock;
+
+	fuse_invalidate_attr(parent);
+	fuse_invalidate_entry(entry);
+	dput(entry);
+	err = 0;
+
+ unlock:
+	mutex_unlock(&parent->i_mutex);
+	iput(parent);
+	return err;
+}
+
 /*
  * Calling into a user-controlled filesystem gives the filesystem
  * daemon ptrace-like capabilities over the requester process.  This
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fce6ce694fde..cbc464043b6f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
-		return PTR_ERR(req);
+		return POLLERR;
 
 	req->in.h.opcode = FUSE_POLL;
 	req->in.h.nodeid = ff->nodeid;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9ff970e..52b641fc0faf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
 	/** Do multi-page cached writes */
 	unsigned big_writes:1;
 
+	/** Don't apply umask to creation modes */
+	unsigned dont_mask:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
@@ -481,6 +484,12 @@ struct fuse_conn {
 
 	/** Called on final put */
 	void (*release)(struct fuse_conn *);
+
+	/** Super block for this connection. */
+	struct super_block *sb;
+
+	/** Read/write semaphore to hold when accessing sb. */
+	struct rw_semaphore killsb;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations;
 extern const struct dentry_operations fuse_dentry_operations;
 
 /**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
+/**
  * Get a filled in inode
  */
 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name);
+
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
 ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673ccf90b7..f91ccc4a189d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 		BUG();
 }
 
-static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 {
 	u64 nodeid = *(u64 *) _nodeidp;
 	if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 	return inode;
 }
 
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len)
+{
+	struct inode *inode;
+	pgoff_t pg_start;
+	pgoff_t pg_end;
+
+	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+	if (!inode)
+		return -ENOENT;
+
+	fuse_invalidate_attr(inode);
+	if (offset >= 0) {
+		pg_start = offset >> PAGE_CACHE_SHIFT;
+		if (len <= 0)
+			pg_end = -1;
+		else
+			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+		invalidate_inode_pages2_range(inode->i_mapping,
+					      pg_start, pg_end);
+	}
+	iput(inode);
+	return 0;
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
+	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
 	init_waitqueue_head(&fc->waitq);
 	init_waitqueue_head(&fc->blocked_waitq);
@@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
+			if (arg->flags & FUSE_DONT_MASK)
+				fc->dont_mask = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
@@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fuse_conn_init(fc);
 
 	fc->dev = sb->s_dev;
+	fc->sb = sb;
 	err = fuse_bdi_init(fc, sb);
 	if (err)
 		goto err_put_conn;
 
+	/* Handle umasking inside the fuse code */
+	if (sb->s_flags & MS_POSIXACL)
+		fc->dont_mask = 1;
+	sb->s_flags |= MS_POSIXACL;
+
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
@@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_anon_super(sb);
+}
+
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE,
 	.get_sb		= fuse_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= fuse_kill_sb_anon,
 };
 
 #ifdef CONFIG_BLOCK
@@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   mnt);
 }
 
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_block_super(sb);
+}
+
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
 	.get_sb		= fuse_get_sb_blk,
-	.kill_sb	= kill_block_super,
+	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fe02ad4740e7..032604e5ef2c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = HOSTFS_SUPER_MAGIC;
 	sb->s_op = &hostfs_sbops;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	/* NULL is printed as <NULL> by sprintf: avoid that. */
 	if (req_root == NULL)
diff --git a/fs/inode.c b/fs/inode.c
index f643be565df8..901bad1e5f12 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
+#include <linux/posix_acl.h>
 
 /*
  * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	}
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+#endif
 
 #ifdef CONFIG_FSNOTIFY
 	inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
 	ima_inode_free(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+	if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_acl);
+	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_default_acl);
+#endif
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
@@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode)
 	if (inode->i_mode & S_IFDIR) {
 		struct file_system_type *type = inode->i_sb->s_type;
 
-		/*
-		 * ensure nobody is actually holding i_mutex
-		 */
-		mutex_destroy(&inode->i_mutex);
-		mutex_init(&inode->i_mutex);
-		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+		/* Set new key only if filesystem hasn't already changed it */
+		if (!lockdep_match_class(&inode->i_mutex,
+		    &type->i_mutex_key)) {
+			/*
+			 * ensure nobody is actually holding i_mutex
+			 */
+			mutex_destroy(&inode->i_mutex);
+			mutex_init(&inode->i_mutex);
+			lockdep_set_class(&inode->i_mutex,
+					  &type->i_mutex_dir_key);
+		}
 	}
 #endif
 	/*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 001f8d3118f2..5612880fcbe7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
+#include <linux/falloc.h>
 
 #include <asm/ioctls.h>
 
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
 
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+int ioctl_preallocate(struct file *filp, void __user *argp)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct space_resv sr;
+
+	if (copy_from_user(&sr, argp, sizeof(sr)))
+		return -EFAULT;
+
+	switch (sr.l_whence) {
+	case SEEK_SET:
+		break;
+	case SEEK_CUR:
+		sr.l_start += filp->f_pos;
+		break;
+	case SEEK_END:
+		sr.l_start += i_size_read(inode);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 		return ioctl_fibmap(filp, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		return ioctl_preallocate(filp, p);
 	}
 
 	return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 043740dde20c..8fcb6239218e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 	return ERR_PTR(-EINVAL);
 }
 
-static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != JFFS2_ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-	return acl;
-}
-
-static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != JFFS2_ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct posix_acl *acl;
 	char *value = NULL;
 	int rc, xprefix;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = jffs2_iget_acl(inode, &f->i_acl_access);
-		if (acl != JFFS2_ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		break;
 	case ACL_TYPE_DEFAULT:
-		acl = jffs2_iget_acl(inode, &f->i_acl_default);
-		if (acl != JFFS2_ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
 	if (rc > 0) {
@@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 	}
 	if (value)
 		kfree(value);
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &f->i_acl_access, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
-			break;
-		}
-	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 	return acl;
 }
 
@@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
 
 static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	int rc, xprefix;
 
 	if (S_ISLNK(inode->i_mode))
@@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		return -EINVAL;
 	}
 	rc = __jffs2_set_acl(inode, xprefix, acl);
-	if (!rc) {
-		switch(type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &f->i_acl_access, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
-			break;
-		}
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
 	return rc;
 }
 
@@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask)
 
 int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct posix_acl *acl, *clone;
 	int rc;
 
-	f->i_acl_default = NULL;
-	f->i_acl_access = NULL;
+	cache_no_acl(inode);
 
 	if (S_ISLNK(*i_mode))
 		return 0;	/* Symlink always has no-ACL */
@@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 		*i_mode &= ~current_umask();
 	} else {
 		if (S_ISDIR(*i_mode))
-			jffs2_iset_acl(inode, &f->i_acl_default, acl);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
 
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		if (!clone)
@@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 			return rc;
 		}
 		if (rc > 0)
-			jffs2_iset_acl(inode, &f->i_acl_access, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 
 		posix_acl_release(clone);
 	}
@@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 
 int jffs2_init_acl_post(struct inode *inode)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	int rc;
 
-	if (f->i_acl_default) {
-		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default);
+	if (inode->i_default_acl) {
+		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
 		if (rc)
 			return rc;
 	}
 
-	if (f->i_acl_access) {
-		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access);
+	if (inode->i_acl) {
+		rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
 		if (rc)
 			return rc;
 	}
@@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode)
 	return 0;
 }
 
-void jffs2_clear_acl(struct jffs2_inode_info *f)
-{
-	if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
-		posix_acl_release(f->i_acl_access);
-		f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-	}
-	if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
-		posix_acl_release(f->i_acl_default);
-		f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-	}
-}
-
 int jffs2_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8ca058aed384..fc929f2a14f6 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-#define JFFS2_ACL_NOT_CACHED ((void *)-1)
-
 extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
-extern void jffs2_clear_acl(struct jffs2_inode_info *);
 
 extern struct xattr_handler jffs2_acl_access_xattr_handler;
 extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
 #define jffs2_acl_chmod(inode)			(0)
 #define jffs2_init_acl_pre(dir_i,inode,mode)	(0)
 #define jffs2_init_acl_post(inode)		(0)
-#define jffs2_clear_acl(f)
 
 #endif	/* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a0244740b75a..b47679be118a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
 	D2({
 		int i=0;
 		struct jffs2_raw_node_ref *this;
-		printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG);
+		printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n");
 
 		this = ic->nodes;
 
+		printk(KERN_DEBUG);
 		while(this) {
-			printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this));
+			printk(KERN_CONT "0x%08x(%d)->",
+			       ref_offset(this), ref_flags(this));
 			if (++i == 5) {
-				printk("\n" KERN_DEBUG);
+				printk(KERN_DEBUG);
 				i=0;
 			}
 			this = this->next_in_ino;
 		}
-		printk("\n");
+		printk(KERN_CONT "\n");
 	});
 
 	switch (ic->class) {
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 4c41db91eaa4..c6923da98263 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
 	uint16_t flags;
 	uint8_t usercompr;
 	struct inode vfs_inode;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-	struct posix_acl *i_acl_access;
-	struct posix_acl *i_acl_default;
-#endif
 };
 
 #endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2228380c47b9..a7f03b7ebcb3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
 	f->target = NULL;
 	f->flags = 0;
 	f->usercompr = 0;
-#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-	f->i_acl_access = JFFS2_ACL_NOT_CACHED;
-	f->i_acl_default = JFFS2_ACL_NOT_CACHED;
-#endif
 }
 
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1fc1e92356ee..1a80301004b8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 	struct jffs2_full_dirent *fd, *fds;
 	int deleted;
 
-	jffs2_clear_acl(f);
 	jffs2_xattr_delete_inode(c, f->inocache);
 	mutex_lock(&f->sem);
 	deleted = f->inocache && !f->inocache->pino_nlink;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 1d437de1e9a8..696686cc206e 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	if (jffs2_sum_active()) {
 		s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
 		if (!s) {
-			kfree(flashbuf);
 			JFFS2_WARNING("Can't allocate memory for summary\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out;
 		}
 	}
 
@@ -196,7 +196,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 				if (c->nextblock) {
 					ret = file_dirty(c, c->nextblock);
 					if (ret)
-						return ret;
+						goto out;
 					/* deleting summary information of the old nextblock */
 					jffs2_sum_reset_collected(c->summary);
 				}
@@ -207,7 +207,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 			} else {
 				ret = file_dirty(c, jeb);
 				if (ret)
-					return ret;
+					goto out;
 			}
 			break;
 
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 06ca1b8d2054..91fa3ad6e8c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *ea_name;
-	struct jfs_inode_info *ji = JFS_IP(inode);
-	struct posix_acl **p_acl;
 	int size;
 	char *value = NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &ji->i_default_acl;
 			break;
 		default:
 			return ERR_PTR(-EINVAL);
 	}
 
-	if (*p_acl != JFS_ACL_NOT_CACHED)
-		return posix_acl_dup(*p_acl);
-
 	size = __jfs_getxattr(inode, ea_name, NULL, 0);
 
 	if (size > 0) {
@@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	}
 
 	if (size < 0) {
-		if (size == -ENODATA) {
-			*p_acl = NULL;
+		if (size == -ENODATA)
 			acl = NULL;
-		} else
+		else
 			acl = ERR_PTR(size);
 	} else {
 		acl = posix_acl_from_xattr(value, size);
-		if (!IS_ERR(acl))
-			*p_acl = posix_acl_dup(acl);
 	}
 	kfree(value);
+	if (!IS_ERR(acl)) {
+		set_cached_acl(inode, type, acl);
+		posix_acl_release(acl);
+	}
 	return acl;
 }
 
@@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 		       struct posix_acl *acl)
 {
 	char *ea_name;
-	struct jfs_inode_info *ji = JFS_IP(inode);
-	struct posix_acl **p_acl;
 	int rc;
 	int size = 0;
 	char *value = NULL;
@@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &ji->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 out:
 	kfree(value);
 
-	if (!rc) {
-		if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
-			posix_acl_release(*p_acl);
-		*p_acl = posix_acl_dup(acl);
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
+
 	return rc;
 }
 
 static int jfs_check_acl(struct inode *inode, int mask)
 {
-	struct jfs_inode_info *ji = JFS_IP(inode);
+	struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
 
-	if (ji->i_acl == JFS_ACL_NOT_CACHED) {
-		struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
-		if (IS_ERR(acl))
-			return PTR_ERR(acl);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
 		posix_acl_release(acl);
+		return error;
 	}
 
-	if (ji->i_acl)
-		return posix_acl_permission(inode, ji->i_acl, mask);
 	return -EAGAIN;
 }
 
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 439901d205fe..1439f119ec83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -74,10 +74,6 @@ struct jfs_inode_info {
 	/* xattr_sem allows us to access the xattrs without taking i_mutex */
 	struct rw_semaphore xattr_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
-#ifdef CONFIG_JFS_POSIX_ACL
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
-#endif
 	union {
 		struct {
 			xtpage_t _xtroot;	/* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
 #define i_inline u.link._inline
 #define i_inline_ea u.link._inline_ea
 
-#define JFS_ACL_NOT_CACHED ((void *)-1)
-
 #define IREAD_LOCK(ip, subclass) \
 	down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
 #define IREAD_UNLOCK(ip)	up_read(&JFS_IP(ip)->rdwrlock)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 09b1b6ee2186..37e6dcda8fc8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
 		ji->active_ag = -1;
 	}
 	spin_unlock_irq(&ji->ag_lock);
-
-#ifdef CONFIG_JFS_POSIX_ACL
-	if (ji->i_acl != JFS_ACL_NOT_CACHED) {
-		posix_acl_release(ji->i_acl);
-		ji->i_acl = JFS_ACL_NOT_CACHED;
-	}
-	if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
-		posix_acl_release(ji->i_default_acl);
-		ji->i_default_acl = JFS_ACL_NOT_CACHED;
-	}
-#endif
-
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
@@ -798,10 +786,6 @@ static void init_once(void *foo)
 	init_rwsem(&jfs_ip->xattr_sem);
 	spin_lock_init(&jfs_ip->ag_lock);
 	jfs_ip->active_ag = -1;
-#ifdef CONFIG_JFS_POSIX_ACL
-	jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
-	jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
-#endif
 	inode_init_once(&jfs_ip->vfs_inode);
 }
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 61dfa8173ebc..fad364548bc9 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the ACL.  Get rid of the cached one
 		 */
-		acl =JFS_IP(inode)->i_acl;
-		if (acl != JFS_ACL_NOT_CACHED)
-			posix_acl_release(acl);
-		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
 		return 0;
 	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the default ACL.  Get rid of the cached one
 		 */
-		acl =JFS_IP(inode)->i_default_acl;
-		if (acl && (acl != JFS_ACL_NOT_CACHED))
-			posix_acl_release(acl);
-		JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
 		return 0;
 	}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index dd7957064a8c..f2fdcbce143e 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -126,7 +126,6 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_lock	*lock = &argp->lock;
 
 	nlmclnt_next_cookie(&argp->cookie);
-	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
@@ -165,6 +164,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 	/* Set up the argument struct */
 	nlmclnt_setlockargs(call, fl);
 
+	lock_kernel();
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
 			call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -178,6 +178,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 
 	fl->fl_ops->fl_release_private(fl);
 	fl->fl_ops = NULL;
+	unlock_kernel();
 
 	dprintk("lockd: clnt proc returns %d\n", status);
 	return status;
@@ -519,6 +520,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (nsm_monitor(host) < 0)
 		goto out;
+	req->a_args.state = nsm_local_state;
 
 	fl->fl_flags |= FL_ACCESS;
 	status = do_vfs_lock(fl);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6d5d4a4169e5..7fce1b525849 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -53,7 +53,7 @@ static				DEFINE_SPINLOCK(nsm_lock);
 /*
  * Local NSM state
  */
-int	__read_mostly		nsm_local_state;
+u32	__read_mostly		nsm_local_state;
 int	__read_mostly		nsm_use_hostnames;
 
 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
@@ -112,6 +112,7 @@ static struct rpc_clnt *nsm_create(void)
 		.program		= &nsm_program,
 		.version		= NSM_VERSION,
 		.authflavor		= RPC_AUTH_NULL,
+		.flags			= RPC_CLNT_CREATE_NOPING,
 	};
 
 	return rpc_create(&args);
@@ -184,13 +185,19 @@ int nsm_monitor(const struct nlm_host *host)
 	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
 	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res);
-	if (res.status != 0)
+	if (unlikely(res.status != 0))
 		status = -EIO;
-	if (status < 0)
+	if (unlikely(status < 0)) {
 		printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name);
-	else
-		nsm->sm_monitored = 1;
-	return status;
+		return status;
+	}
+
+	nsm->sm_monitored = 1;
+	if (unlikely(nsm_local_state != res.state)) {
+		nsm_local_state = res.state;
+		dprintk("lockd: NSM state changed to %d\n", nsm_local_state);
+	}
+	return 0;
 }
 
 /**
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 83ee34203bd7..e577a78d7bac 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
 {
 	if (call->a_args.lock.oh.data != call->a_owner)
 		kfree(call->a_args.lock.oh.data);
+
+	locks_release_private(&call->a_args.lock.fl);
 }
 
 /*
diff --git a/fs/locks.c b/fs/locks.c
index ec3deea29e37..b6440f52178f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void)
 	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
 
-static void locks_release_private(struct file_lock *fl)
+void locks_release_private(struct file_lock *fl)
 {
 	if (fl->fl_ops) {
 		if (fl->fl_ops->fl_release_private)
@@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl)
 	}
 
 }
+EXPORT_SYMBOL_GPL(locks_release_private);
 
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index cb7fdd11f9a5..9dcf95b42116 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -1,3 +1,6 @@
+#ifndef FS_MINIX_H
+#define FS_MINIX_H
+
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/minix_fs.h>
@@ -86,3 +89,5 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
 {
 	return list_entry(inode, struct minix_inode_info, vfs_inode);
 }
+
+#endif /* FS_MINIX_H */
diff --git a/fs/namei.c b/fs/namei.c
index 527119afb6a5..f3c5b278895a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (error)
 		return ERR_PTR(error);
 	error = path_walk(pathname, &nd);
-	if (error)
+	if (error) {
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return ERR_PTR(error);
+	}
 	if (unlikely(!audit_dummy_context()))
 		audit_inode(pathname, nd.path.dentry);
 
@@ -1758,7 +1761,13 @@ do_last:
 			goto exit;
 		}
 		filp = nameidata_to_filp(&nd, open_flag);
+		if (IS_ERR(filp))
+			ima_counts_put(&nd.path,
+				       acc_mode & (MAY_READ | MAY_WRITE |
+						   MAY_EXEC));
 		mnt_drop_write(nd.path.mnt);
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return filp;
 	}
 
@@ -1812,6 +1821,9 @@ ok:
 		goto exit;
 	}
 	filp = nameidata_to_filp(&nd, open_flag);
+	if (IS_ERR(filp))
+		ima_counts_put(&nd.path,
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
 	/*
 	 * It is now safe to drop the mnt write
 	 * because the filp has had a write taken
@@ -1819,6 +1831,8 @@ ok:
 	 */
 	if (will_write)
 		mnt_drop_write(nd.path.mnt);
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	return filp;
 
 exit_mutex_unlock:
@@ -1859,6 +1873,8 @@ do_link:
 		 * with "intent.open".
 		 */
 		release_open_intent(&nd);
+		if (nd.root.mnt)
+			path_put(&nd.root);
 		return ERR_PTR(error);
 	}
 	nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 2dd333b0fe7f..277c28a63ead 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/mnt_namespace.h>
 #include <linux/namei.h>
+#include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/ramfs.h>
@@ -42,6 +43,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 static int event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
+static int mnt_id_start = 0;
+static int mnt_group_start = 1;
 
 static struct list_head *mount_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +72,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
 retry:
 	ida_pre_get(&mnt_id_ida, GFP_KERNEL);
 	spin_lock(&vfsmount_lock);
-	res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+	res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
+	if (!res)
+		mnt_id_start = mnt->mnt_id + 1;
 	spin_unlock(&vfsmount_lock);
 	if (res == -EAGAIN)
 		goto retry;
@@ -79,8 +84,11 @@ retry:
 
 static void mnt_free_id(struct vfsmount *mnt)
 {
+	int id = mnt->mnt_id;
 	spin_lock(&vfsmount_lock);
-	ida_remove(&mnt_id_ida, mnt->mnt_id);
+	ida_remove(&mnt_id_ida, id);
+	if (mnt_id_start > id)
+		mnt_id_start = id;
 	spin_unlock(&vfsmount_lock);
 }
 
@@ -91,10 +99,18 @@ static void mnt_free_id(struct vfsmount *mnt)
  */
 static int mnt_alloc_group_id(struct vfsmount *mnt)
 {
+	int res;
+
 	if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
 		return -ENOMEM;
 
-	return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+	res = ida_get_new_above(&mnt_group_ida,
+				mnt_group_start,
+				&mnt->mnt_group_id);
+	if (!res)
+		mnt_group_start = mnt->mnt_group_id + 1;
+
+	return res;
 }
 
 /*
@@ -102,7 +118,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
  */
 void mnt_release_group_id(struct vfsmount *mnt)
 {
-	ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+	int id = mnt->mnt_group_id;
+	ida_remove(&mnt_group_ida, id);
+	if (mnt_group_start > id)
+		mnt_group_start = id;
 	mnt->mnt_group_id = 0;
 }
 
@@ -1937,6 +1956,21 @@ dput_out:
 	return retval;
 }
 
+static struct mnt_namespace *alloc_mnt_ns(void)
+{
+	struct mnt_namespace *new_ns;
+
+	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+	if (!new_ns)
+		return ERR_PTR(-ENOMEM);
+	atomic_set(&new_ns->count, 1);
+	new_ns->root = NULL;
+	INIT_LIST_HEAD(&new_ns->list);
+	init_waitqueue_head(&new_ns->poll);
+	new_ns->event = 0;
+	return new_ns;
+}
+
 /*
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
@@ -1948,14 +1982,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct vfsmount *p, *q;
 
-	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
-		return ERR_PTR(-ENOMEM);
-
-	atomic_set(&new_ns->count, 1);
-	INIT_LIST_HEAD(&new_ns->list);
-	init_waitqueue_head(&new_ns->poll);
-	new_ns->event = 0;
+	new_ns = alloc_mnt_ns();
+	if (IS_ERR(new_ns))
+		return new_ns;
 
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
@@ -2019,6 +2048,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	return new_ns;
 }
 
+/**
+ * create_mnt_ns - creates a private namespace and adds a root filesystem
+ * @mnt: pointer to the new root filesystem mountpoint
+ */
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+{
+	struct mnt_namespace *new_ns;
+
+	new_ns = alloc_mnt_ns();
+	if (!IS_ERR(new_ns)) {
+		mnt->mnt_ns = new_ns;
+		new_ns->root = mnt;
+		list_add(&new_ns->list, &new_ns->root->mnt_list);
+	}
+	return new_ns;
+}
+EXPORT_SYMBOL(create_mnt_ns);
+
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
 {
@@ -2194,16 +2241,9 @@ static void __init init_mount_tree(void)
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
-	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
-	if (!ns)
+	ns = create_mnt_ns(mnt);
+	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
-	atomic_set(&ns->count, 1);
-	INIT_LIST_HEAD(&ns->list);
-	init_waitqueue_head(&ns->poll);
-	ns->event = 0;
-	list_add(&mnt->mnt_list, &ns->list);
-	ns->root = mnt;
-	mnt->mnt_ns = ns;
 
 	init_task.nsproxy->mnt_ns = ns;
 	get_mnt_ns(ns);
@@ -2246,10 +2286,14 @@ void __init mnt_init(void)
 	init_mount_tree();
 }
 
-void __put_mnt_ns(struct mnt_namespace *ns)
+void put_mnt_ns(struct mnt_namespace *ns)
 {
-	struct vfsmount *root = ns->root;
+	struct vfsmount *root;
 	LIST_HEAD(umount_list);
+
+	if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+		return;
+	root = ns->root;
 	ns->root = NULL;
 	spin_unlock(&vfsmount_lock);
 	down_write(&namespace_sem);
@@ -2260,3 +2304,4 @@ void __put_mnt_ns(struct mnt_namespace *ns)
 	release_mounts(&umount_list);
 	kfree(ns);
 }
+EXPORT_SYMBOL(put_mnt_ns);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index e67f3ec07736..2a77bc25d5af 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
 config NFS_FS
 	tristate "NFS client support"
-	depends on INET
+	depends on INET && FILE_LOCKING
 	select LOCKD
 	select SUNRPC
 	select NFS_ACL_SUPPORT if NFS_V3_ACL
@@ -74,6 +74,15 @@ config NFS_V4
 
 	  If unsure, say N.
 
+config NFS_V4_1
+	bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
+	depends on NFS_V4 && EXPERIMENTAL
+	help
+	  This option enables support for minor version 1 of the NFSv4 protocol
+	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
+
+	  Unless you're an NFS developer, say N.
+
 config ROOT_NFS
 	bool "Root file system on NFS"
 	depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a886e692ddd0..7f604c7941fb 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,6 +17,9 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#if defined(CONFIG_NFS_V4_1)
+#include <linux/sunrpc/bc_xprt.h>
+#endif
 
 #include <net/inet_sock.h>
 
@@ -28,11 +31,12 @@
 
 struct nfs_callback_data {
 	unsigned int users;
+	struct svc_serv *serv;
 	struct svc_rqst *rqst;
 	struct task_struct *task;
 };
 
-static struct nfs_callback_data nfs_callback_info;
+static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1];
 static DEFINE_MUTEX(nfs_callback_mutex);
 static struct svc_program nfs4_callback_program;
 
@@ -56,10 +60,10 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
 		 &nfs_callback_set_tcpport, 0644);
 
 /*
- * This is the callback kernel thread.
+ * This is the NFSv4 callback kernel thread.
  */
 static int
-nfs_callback_svc(void *vrqstp)
+nfs4_callback_svc(void *vrqstp)
 {
 	int err, preverr = 0;
 	struct svc_rqst *rqstp = vrqstp;
@@ -97,20 +101,12 @@ nfs_callback_svc(void *vrqstp)
 }
 
 /*
- * Bring up the callback thread if it is not already up.
+ * Prepare to bring up the NFSv4 callback service
  */
-int nfs_callback_up(void)
+struct svc_rqst *
+nfs4_callback_up(struct svc_serv *serv)
 {
-	struct svc_serv *serv = NULL;
-	int ret = 0;
-
-	mutex_lock(&nfs_callback_mutex);
-	if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
-		goto out;
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
-	ret = -ENOMEM;
-	if (!serv)
-		goto out_err;
+	int ret;
 
 	ret = svc_create_xprt(serv, "tcp", PF_INET,
 				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
@@ -127,27 +123,174 @@ int nfs_callback_up(void)
 		nfs_callback_tcpport6 = ret;
 		dprintk("NFS: Callback listener port = %u (af %u)\n",
 				nfs_callback_tcpport6, PF_INET6);
-	} else if (ret != -EAFNOSUPPORT)
+	} else if (ret == -EAFNOSUPPORT)
+		ret = 0;
+	else
 		goto out_err;
 #endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
-	nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
-	if (IS_ERR(nfs_callback_info.rqst)) {
-		ret = PTR_ERR(nfs_callback_info.rqst);
-		nfs_callback_info.rqst = NULL;
+	return svc_prepare_thread(serv, &serv->sv_pools[0]);
+
+out_err:
+	if (ret == 0)
+		ret = -ENOMEM;
+	return ERR_PTR(ret);
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * The callback service for NFSv4.1 callbacks
+ */
+static int
+nfs41_callback_svc(void *vrqstp)
+{
+	struct svc_rqst *rqstp = vrqstp;
+	struct svc_serv *serv = rqstp->rq_server;
+	struct rpc_rqst *req;
+	int error;
+	DEFINE_WAIT(wq);
+
+	set_freezable();
+
+	/*
+	 * FIXME: do we really need to run this under the BKL? If so, please
+	 * add a comment about what it's intended to protect.
+	 */
+	lock_kernel();
+	while (!kthread_should_stop()) {
+		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+		spin_lock_bh(&serv->sv_cb_lock);
+		if (!list_empty(&serv->sv_cb_list)) {
+			req = list_first_entry(&serv->sv_cb_list,
+					struct rpc_rqst, rq_bc_list);
+			list_del(&req->rq_bc_list);
+			spin_unlock_bh(&serv->sv_cb_lock);
+			dprintk("Invoking bc_svc_process()\n");
+			error = bc_svc_process(serv, req, rqstp);
+			dprintk("bc_svc_process() returned w/ error code= %d\n",
+				error);
+		} else {
+			spin_unlock_bh(&serv->sv_cb_lock);
+			schedule();
+		}
+		finish_wait(&serv->sv_cb_waitq, &wq);
+	}
+	unlock_kernel();
+	return 0;
+}
+
+/*
+ * Bring up the NFSv4.1 callback service
+ */
+struct svc_rqst *
+nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
+{
+	struct svc_xprt *bc_xprt;
+	struct svc_rqst *rqstp = ERR_PTR(-ENOMEM);
+
+	dprintk("--> %s\n", __func__);
+	/* Create a svc_sock for the service */
+	bc_xprt = svc_sock_create(serv, xprt->prot);
+	if (!bc_xprt)
+		goto out;
+
+	/*
+	 * Save the svc_serv in the transport so that it can
+	 * be referenced when the session backchannel is initialized
+	 */
+	serv->bc_xprt = bc_xprt;
+	xprt->bc_serv = serv;
+
+	INIT_LIST_HEAD(&serv->sv_cb_list);
+	spin_lock_init(&serv->sv_cb_lock);
+	init_waitqueue_head(&serv->sv_cb_waitq);
+	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	if (IS_ERR(rqstp))
+		svc_sock_destroy(bc_xprt);
+out:
+	dprintk("--> %s return %p\n", __func__, rqstp);
+	return rqstp;
+}
+
+static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
+		struct svc_serv *serv, struct rpc_xprt *xprt,
+		struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
+{
+	if (minorversion) {
+		*rqstpp = nfs41_callback_up(serv, xprt);
+		*callback_svc = nfs41_callback_svc;
+	}
+	return minorversion;
+}
+
+static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
+		struct nfs_callback_data *cb_info)
+{
+	if (minorversion)
+		xprt->bc_serv = cb_info->serv;
+}
+#else
+static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
+		struct svc_serv *serv, struct rpc_xprt *xprt,
+		struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
+{
+	return 0;
+}
+
+static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
+		struct nfs_callback_data *cb_info)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+/*
+ * Bring up the callback thread if it is not already up.
+ */
+int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
+{
+	struct svc_serv *serv = NULL;
+	struct svc_rqst *rqstp;
+	int (*callback_svc)(void *vrqstp);
+	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+	char svc_name[12];
+	int ret = 0;
+	int minorversion_setup;
+
+	mutex_lock(&nfs_callback_mutex);
+	if (cb_info->users++ || cb_info->task != NULL) {
+		nfs_callback_bc_serv(minorversion, xprt, cb_info);
+		goto out;
+	}
+	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
+	if (!serv) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	minorversion_setup =  nfs_minorversion_callback_svc_setup(minorversion,
+					serv, xprt, &rqstp, &callback_svc);
+	if (!minorversion_setup) {
+		/* v4.0 callback setup */
+		rqstp = nfs4_callback_up(serv);
+		callback_svc = nfs4_callback_svc;
+	}
+
+	if (IS_ERR(rqstp)) {
+		ret = PTR_ERR(rqstp);
 		goto out_err;
 	}
 
 	svc_sock_update_bufs(serv);
 
-	nfs_callback_info.task = kthread_run(nfs_callback_svc,
-					     nfs_callback_info.rqst,
-					     "nfsv4-svc");
-	if (IS_ERR(nfs_callback_info.task)) {
-		ret = PTR_ERR(nfs_callback_info.task);
-		svc_exit_thread(nfs_callback_info.rqst);
-		nfs_callback_info.rqst = NULL;
-		nfs_callback_info.task = NULL;
+	sprintf(svc_name, "nfsv4.%u-svc", minorversion);
+	cb_info->serv = serv;
+	cb_info->rqst = rqstp;
+	cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name);
+	if (IS_ERR(cb_info->task)) {
+		ret = PTR_ERR(cb_info->task);
+		svc_exit_thread(cb_info->rqst);
+		cb_info->rqst = NULL;
+		cb_info->task = NULL;
 		goto out_err;
 	}
 out:
@@ -164,22 +307,25 @@ out:
 out_err:
 	dprintk("NFS: Couldn't create callback socket or server thread; "
 		"err = %d\n", ret);
-	nfs_callback_info.users--;
+	cb_info->users--;
 	goto out;
 }
 
 /*
  * Kill the callback thread if it's no longer being used.
  */
-void nfs_callback_down(void)
+void nfs_callback_down(int minorversion)
 {
+	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+
 	mutex_lock(&nfs_callback_mutex);
-	nfs_callback_info.users--;
-	if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) {
-		kthread_stop(nfs_callback_info.task);
-		svc_exit_thread(nfs_callback_info.rqst);
-		nfs_callback_info.rqst = NULL;
-		nfs_callback_info.task = NULL;
+	cb_info->users--;
+	if (cb_info->users == 0 && cb_info->task != NULL) {
+		kthread_stop(cb_info->task);
+		svc_exit_thread(cb_info->rqst);
+		cb_info->serv = NULL;
+		cb_info->rqst = NULL;
+		cb_info->task = NULL;
 	}
 	mutex_unlock(&nfs_callback_mutex);
 }
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index e110e286a262..07baa8254ca1 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -20,13 +20,24 @@ enum nfs4_callback_procnum {
 enum nfs4_callback_opnum {
 	OP_CB_GETATTR = 3,
 	OP_CB_RECALL  = 4,
+/* Callback operations new to NFSv4.1 */
+	OP_CB_LAYOUTRECALL  = 5,
+	OP_CB_NOTIFY        = 6,
+	OP_CB_PUSH_DELEG    = 7,
+	OP_CB_RECALL_ANY    = 8,
+	OP_CB_RECALLABLE_OBJ_AVAIL = 9,
+	OP_CB_RECALL_SLOT   = 10,
+	OP_CB_SEQUENCE      = 11,
+	OP_CB_WANTS_CANCELLED = 12,
+	OP_CB_NOTIFY_LOCK   = 13,
+	OP_CB_NOTIFY_DEVICEID = 14,
 	OP_CB_ILLEGAL = 10044,
 };
 
 struct cb_compound_hdr_arg {
 	unsigned int taglen;
 	const char *tag;
-	unsigned int callback_ident;
+	unsigned int minorversion;
 	unsigned nops;
 };
 
@@ -59,16 +70,59 @@ struct cb_recallargs {
 	uint32_t truncate;
 };
 
+#if defined(CONFIG_NFS_V4_1)
+
+struct referring_call {
+	uint32_t			rc_sequenceid;
+	uint32_t			rc_slotid;
+};
+
+struct referring_call_list {
+	struct nfs4_sessionid		rcl_sessionid;
+	uint32_t			rcl_nrefcalls;
+	struct referring_call 		*rcl_refcalls;
+};
+
+struct cb_sequenceargs {
+	struct sockaddr			*csa_addr;
+	struct nfs4_sessionid		csa_sessionid;
+	uint32_t			csa_sequenceid;
+	uint32_t			csa_slotid;
+	uint32_t			csa_highestslotid;
+	uint32_t			csa_cachethis;
+	uint32_t			csa_nrclists;
+	struct referring_call_list	*csa_rclists;
+};
+
+struct cb_sequenceres {
+	__be32				csr_status;
+	struct nfs4_sessionid		csr_sessionid;
+	uint32_t			csr_sequenceid;
+	uint32_t			csr_slotid;
+	uint32_t			csr_highestslotid;
+	uint32_t			csr_target_highestslotid;
+};
+
+extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
+				       struct cb_sequenceres *res);
+
+#endif /* CONFIG_NFS_V4_1 */
+
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
 #ifdef CONFIG_NFS_V4
-extern int nfs_callback_up(void);
-extern void nfs_callback_down(void);
-#else
-#define nfs_callback_up()	(0)
-#define nfs_callback_down()	do {} while(0)
-#endif
+extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
+extern void nfs_callback_down(int minorversion);
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * nfs41: Callbacks are expected to not cause substantial latency,
+ * so we limit their concurrency to 1 by setting up the maximum number
+ * of slots for the backchannel.
+ */
+#define NFS41_BC_MIN_CALLBACKS 1
+#define NFS41_BC_MAX_CALLBACKS 1
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f7e83e23cf9f..b7da1f54da68 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -101,3 +101,130 @@ out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
 	return res;
 }
+
+#if defined(CONFIG_NFS_V4_1)
+
+/*
+ * Validate the sequenceID sent by the server.
+ * Return success if the sequenceID is one more than what we last saw on
+ * this slot, accounting for wraparound.  Increments the slot's sequence.
+ *
+ * We don't yet implement a duplicate request cache, so at this time
+ * we will log replays, and process them as if we had not seen them before,
+ * but we don't bump the sequence in the slot.  Not too worried about it,
+ * since we only currently implement idempotent callbacks anyway.
+ *
+ * We have a single slot backchannel at this time, so we don't bother
+ * checking the used_slots bit array on the table.  The lower layer guarantees
+ * a single outstanding callback request at a time.
+ */
+static int
+validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid)
+{
+	struct nfs4_slot *slot;
+
+	dprintk("%s enter. slotid %d seqid %d\n",
+		__func__, slotid, seqid);
+
+	if (slotid > NFS41_BC_MAX_CALLBACKS)
+		return htonl(NFS4ERR_BADSLOT);
+
+	slot = tbl->slots + slotid;
+	dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
+
+	/* Normal */
+	if (likely(seqid == slot->seq_nr + 1)) {
+		slot->seq_nr++;
+		return htonl(NFS4_OK);
+	}
+
+	/* Replay */
+	if (seqid == slot->seq_nr) {
+		dprintk("%s seqid %d is a replay - no DRC available\n",
+			__func__, seqid);
+		return htonl(NFS4_OK);
+	}
+
+	/* Wraparound */
+	if (seqid == 1 && (slot->seq_nr + 1) == 0) {
+		slot->seq_nr = 1;
+		return htonl(NFS4_OK);
+	}
+
+	/* Misordered request */
+	return htonl(NFS4ERR_SEQ_MISORDERED);
+}
+
+/*
+ * Returns a pointer to a held 'struct nfs_client' that matches the server's
+ * address, major version number, and session ID.  It is the caller's
+ * responsibility to release the returned reference.
+ *
+ * Returns NULL if there are no connections with sessions, or if no session
+ * matches the one of interest.
+ */
+ static struct nfs_client *find_client_with_session(
+	const struct sockaddr *addr, u32 nfsversion,
+	struct nfs4_sessionid *sessionid)
+{
+	struct nfs_client *clp;
+
+	clp = nfs_find_client(addr, 4);
+	if (clp == NULL)
+		return NULL;
+
+	do {
+		struct nfs_client *prev = clp;
+
+		if (clp->cl_session != NULL) {
+			if (memcmp(clp->cl_session->sess_id.data,
+					sessionid->data,
+					NFS4_MAX_SESSIONID_LEN) == 0) {
+				/* Returns a held reference to clp */
+				return clp;
+			}
+		}
+		clp = nfs_find_client_next(prev);
+		nfs_put_client(prev);
+	} while (clp != NULL);
+
+	return NULL;
+}
+
+/* FIXME: referring calls should be processed */
+unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
+				struct cb_sequenceres *res)
+{
+	struct nfs_client *clp;
+	int i, status;
+
+	for (i = 0; i < args->csa_nrclists; i++)
+		kfree(args->csa_rclists[i].rcl_refcalls);
+	kfree(args->csa_rclists);
+
+	status = htonl(NFS4ERR_BADSESSION);
+	clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
+	if (clp == NULL)
+		goto out;
+
+	status = validate_seqid(&clp->cl_session->bc_slot_table,
+				args->csa_slotid, args->csa_sequenceid);
+	if (status)
+		goto out_putclient;
+
+	memcpy(&res->csr_sessionid, &args->csa_sessionid,
+	       sizeof(res->csr_sessionid));
+	res->csr_sequenceid = args->csa_sequenceid;
+	res->csr_slotid = args->csa_slotid;
+	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+
+out_putclient:
+	nfs_put_client(clp);
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	res->csr_status = status;
+	return res->csr_status;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index dd0ef34b5845..e5a2dac5f715 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -20,6 +20,11 @@
 				2 + 2 + 3 + 3)
 #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 
+#if defined(CONFIG_NFS_V4_1)
+#define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+					4 + 1 + 3)
+#endif /* CONFIG_NFS_V4_1 */
+
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
 typedef __be32 (*callback_process_op_t)(void *, void *);
@@ -132,7 +137,6 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
 {
 	__be32 *p;
-	unsigned int minor_version;
 	__be32 status;
 
 	status = decode_string(xdr, &hdr->taglen, &hdr->tag);
@@ -147,15 +151,19 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 	p = read_buf(xdr, 12);
 	if (unlikely(p == NULL))
 		return htonl(NFS4ERR_RESOURCE);
-	minor_version = ntohl(*p++);
-	/* Check minor version is zero. */
-	if (minor_version != 0) {
-		printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
-				__func__, minor_version);
+	hdr->minorversion = ntohl(*p++);
+	/* Check minor version is zero or one. */
+	if (hdr->minorversion <= 1) {
+		p++;	/* skip callback_ident */
+	} else {
+		printk(KERN_WARNING "%s: NFSv4 server callback with "
+			"illegal minor version %u!\n",
+			__func__, hdr->minorversion);
 		return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 	}
-	hdr->callback_ident = ntohl(*p++);
 	hdr->nops = ntohl(*p);
+	dprintk("%s: minorversion %d nops %d\n", __func__,
+		hdr->minorversion, hdr->nops);
 	return 0;
 }
 
@@ -204,6 +212,122 @@ out:
 	return status;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
+static unsigned decode_sessionid(struct xdr_stream *xdr,
+				 struct nfs4_sessionid *sid)
+{
+	uint32_t *p;
+	int len = NFS4_MAX_SESSIONID_LEN;
+
+	p = read_buf(xdr, len);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);;
+
+	memcpy(sid->data, p, len);
+	return 0;
+}
+
+static unsigned decode_rc_list(struct xdr_stream *xdr,
+			       struct referring_call_list *rc_list)
+{
+	uint32_t *p;
+	int i;
+	unsigned status;
+
+	status = decode_sessionid(xdr, &rc_list->rcl_sessionid);
+	if (status)
+		goto out;
+
+	status = htonl(NFS4ERR_RESOURCE);
+	p = read_buf(xdr, sizeof(uint32_t));
+	if (unlikely(p == NULL))
+		goto out;
+
+	rc_list->rcl_nrefcalls = ntohl(*p++);
+	if (rc_list->rcl_nrefcalls) {
+		p = read_buf(xdr,
+			     rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
+		if (unlikely(p == NULL))
+			goto out;
+		rc_list->rcl_refcalls = kmalloc(rc_list->rcl_nrefcalls *
+						sizeof(*rc_list->rcl_refcalls),
+						GFP_KERNEL);
+		if (unlikely(rc_list->rcl_refcalls == NULL))
+			goto out;
+		for (i = 0; i < rc_list->rcl_nrefcalls; i++) {
+			rc_list->rcl_refcalls[i].rc_sequenceid = ntohl(*p++);
+			rc_list->rcl_refcalls[i].rc_slotid = ntohl(*p++);
+		}
+	}
+	status = 0;
+
+out:
+	return status;
+}
+
+static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					struct cb_sequenceargs *args)
+{
+	uint32_t *p;
+	int i;
+	unsigned status;
+
+	status = decode_sessionid(xdr, &args->csa_sessionid);
+	if (status)
+		goto out;
+
+	status = htonl(NFS4ERR_RESOURCE);
+	p = read_buf(xdr, 5 * sizeof(uint32_t));
+	if (unlikely(p == NULL))
+		goto out;
+
+	args->csa_addr = svc_addr(rqstp);
+	args->csa_sequenceid = ntohl(*p++);
+	args->csa_slotid = ntohl(*p++);
+	args->csa_highestslotid = ntohl(*p++);
+	args->csa_cachethis = ntohl(*p++);
+	args->csa_nrclists = ntohl(*p++);
+	args->csa_rclists = NULL;
+	if (args->csa_nrclists) {
+		args->csa_rclists = kmalloc(args->csa_nrclists *
+					    sizeof(*args->csa_rclists),
+					    GFP_KERNEL);
+		if (unlikely(args->csa_rclists == NULL))
+			goto out;
+
+		for (i = 0; i < args->csa_nrclists; i++) {
+			status = decode_rc_list(xdr, &args->csa_rclists[i]);
+			if (status)
+				goto out_free;
+		}
+	}
+	status = 0;
+
+	dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
+		"highestslotid %u cachethis %d nrclists %u\n",
+		__func__,
+		((u32 *)&args->csa_sessionid)[0],
+		((u32 *)&args->csa_sessionid)[1],
+		((u32 *)&args->csa_sessionid)[2],
+		((u32 *)&args->csa_sessionid)[3],
+		args->csa_sequenceid, args->csa_slotid,
+		args->csa_highestslotid, args->csa_cachethis,
+		args->csa_nrclists);
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+
+out_free:
+	for (i = 0; i < args->csa_nrclists; i++)
+		kfree(args->csa_rclists[i].rcl_refcalls);
+	kfree(args->csa_rclists);
+	goto out;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
 	__be32 *p;
@@ -353,31 +477,134 @@ out:
 	return status;
 }
 
-static __be32 process_op(struct svc_rqst *rqstp,
+#if defined(CONFIG_NFS_V4_1)
+
+static unsigned encode_sessionid(struct xdr_stream *xdr,
+				 const struct nfs4_sessionid *sid)
+{
+	uint32_t *p;
+	int len = NFS4_MAX_SESSIONID_LEN;
+
+	p = xdr_reserve_space(xdr, len);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+
+	memcpy(p, sid, len);
+	return 0;
+}
+
+static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       const struct cb_sequenceres *res)
+{
+	uint32_t *p;
+	unsigned status = res->csr_status;
+
+	if (unlikely(status != 0))
+		goto out;
+
+	encode_sessionid(xdr, &res->csr_sessionid);
+
+	p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_RESOURCE);
+
+	*p++ = htonl(res->csr_sequenceid);
+	*p++ = htonl(res->csr_slotid);
+	*p++ = htonl(res->csr_highestslotid);
+	*p++ = htonl(res->csr_target_highestslotid);
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+}
+
+static __be32
+preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
+{
+	if (op_nr == OP_CB_SEQUENCE) {
+		if (nop != 0)
+			return htonl(NFS4ERR_SEQUENCE_POS);
+	} else {
+		if (nop == 0)
+			return htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	}
+
+	switch (op_nr) {
+	case OP_CB_GETATTR:
+	case OP_CB_RECALL:
+	case OP_CB_SEQUENCE:
+		*op = &callback_ops[op_nr];
+		break;
+
+	case OP_CB_LAYOUTRECALL:
+	case OP_CB_NOTIFY_DEVICEID:
+	case OP_CB_NOTIFY:
+	case OP_CB_PUSH_DELEG:
+	case OP_CB_RECALL_ANY:
+	case OP_CB_RECALLABLE_OBJ_AVAIL:
+	case OP_CB_RECALL_SLOT:
+	case OP_CB_WANTS_CANCELLED:
+	case OP_CB_NOTIFY_LOCK:
+		return htonl(NFS4ERR_NOTSUPP);
+
+	default:
+		return htonl(NFS4ERR_OP_ILLEGAL);
+	}
+
+	return htonl(NFS_OK);
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+static __be32
+preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
+{
+	return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+static __be32
+preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
+{
+	switch (op_nr) {
+	case OP_CB_GETATTR:
+	case OP_CB_RECALL:
+		*op = &callback_ops[op_nr];
+		break;
+	default:
+		return htonl(NFS4ERR_OP_ILLEGAL);
+	}
+
+	return htonl(NFS_OK);
+}
+
+static __be32 process_op(uint32_t minorversion, int nop,
+		struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
 	struct callback_op *op = &callback_ops[0];
 	unsigned int op_nr = OP_CB_ILLEGAL;
-	__be32 status = 0;
+	__be32 status;
 	long maxlen;
 	__be32 res;
 
 	dprintk("%s: start\n", __func__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (likely(status == 0)) {
-		switch (op_nr) {
-			case OP_CB_GETATTR:
-			case OP_CB_RECALL:
-				op = &callback_ops[op_nr];
-				break;
-			default:
-				op_nr = OP_CB_ILLEGAL;
-				op = &callback_ops[0];
-				status = htonl(NFS4ERR_OP_ILLEGAL);
-		}
+	if (unlikely(status)) {
+		status = htonl(NFS4ERR_OP_ILLEGAL);
+		goto out;
 	}
 
+	dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
+		__func__, minorversion, nop, op_nr);
+
+	status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) :
+				preprocess_nfs4_op(op_nr, &op);
+	if (status == htonl(NFS4ERR_OP_ILLEGAL))
+		op_nr = OP_CB_ILLEGAL;
+out:
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
 		if (likely(status == 0 && op->decode_args != NULL))
@@ -425,7 +652,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 		return rpc_system_err;
 
 	while (status == 0 && nops != hdr_arg.nops) {
-		status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
+		status = process_op(hdr_arg.minorversion, nops,
+				    rqstp, &xdr_in, argp, &xdr_out, resp);
 		nops++;
 	}
 
@@ -452,7 +680,15 @@ static struct callback_op callback_ops[] = {
 		.process_op = (callback_process_op_t)nfs4_callback_recall,
 		.decode_args = (callback_decode_arg_t)decode_recall_args,
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
-	}
+	},
+#if defined(CONFIG_NFS_V4_1)
+	[OP_CB_SEQUENCE] = {
+		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+		.encode_res = (callback_encode_res_t)encode_cb_sequence_res,
+		.res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
+	},
+#endif /* CONFIG_NFS_V4_1 */
 };
 
 /*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 75c9cd2aa119..c2d061675d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -37,6 +37,7 @@
 #include <linux/in6.h>
 #include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #include <asm/system.h>
 
@@ -102,6 +103,7 @@ struct nfs_client_initdata {
 	size_t addrlen;
 	const struct nfs_rpc_ops *rpc_ops;
 	int proto;
+	u32 minorversion;
 };
 
 /*
@@ -114,18 +116,13 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 {
 	struct nfs_client *clp;
 	struct rpc_cred *cred;
+	int err = -ENOMEM;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
 	clp->rpc_ops = cl_init->rpc_ops;
 
-	if (cl_init->rpc_ops->version == 4) {
-		if (nfs_callback_up() < 0)
-			goto error_2;
-		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
-	}
-
 	atomic_set(&clp->cl_count, 1);
 	clp->cl_cons_state = NFS_CS_INITING;
 
@@ -133,9 +130,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_addrlen = cl_init->addrlen;
 
 	if (cl_init->hostname) {
+		err = -ENOMEM;
 		clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
 		if (!clp->cl_hostname)
-			goto error_3;
+			goto error_cleanup;
 	}
 
 	INIT_LIST_HEAD(&clp->cl_superblocks);
@@ -150,6 +148,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+	clp->cl_minorversion = cl_init->minorversion;
 #endif
 	cred = rpc_lookup_machine_cred();
 	if (!IS_ERR(cred))
@@ -159,13 +158,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 
 	return clp;
 
-error_3:
-	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
-		nfs_callback_down();
-error_2:
+error_cleanup:
 	kfree(clp);
 error_0:
-	return NULL;
+	return ERR_PTR(err);
 }
 
 static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -182,12 +178,42 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 }
 
 /*
+ * Destroy the NFS4 callback service
+ */
+static void nfs4_destroy_callback(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down(clp->cl_minorversion);
+#endif /* CONFIG_NFS_V4 */
+}
+
+/*
+ * Clears/puts all minor version specific parts from an nfs_client struct
+ * reverting it to minorversion 0.
+ */
+static void nfs4_clear_client_minor_version(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (nfs4_has_session(clp)) {
+		nfs4_destroy_session(clp->cl_session);
+		clp->cl_session = NULL;
+	}
+
+	clp->cl_call_sync = _nfs4_call_sync;
+#endif /* CONFIG_NFS_V4_1 */
+
+	nfs4_destroy_callback(clp);
+}
+
+/*
  * Destroy a shared client record
  */
 static void nfs_free_client(struct nfs_client *clp)
 {
 	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
 
+	nfs4_clear_client_minor_version(clp);
 	nfs4_shutdown_client(clp);
 
 	nfs_fscache_release_client_cookie(clp);
@@ -196,9 +222,6 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (!IS_ERR(clp->cl_rpcclient))
 		rpc_shutdown_client(clp->cl_rpcclient);
 
-	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
-		nfs_callback_down();
-
 	if (clp->cl_machine_cred != NULL)
 		put_rpccred(clp->cl_machine_cred);
 
@@ -347,7 +370,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
 		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 
 		/* Don't match clients that failed to initialise properly */
-		if (clp->cl_cons_state != NFS_CS_READY)
+		if (!(clp->cl_cons_state == NFS_CS_READY ||
+		      clp->cl_cons_state == NFS_CS_SESSION_INITING))
 			continue;
 
 		/* Different NFS versions cannot share the same nfs_client */
@@ -420,7 +444,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 
 		if (clp->cl_proto != data->proto)
 			continue;
-
+		/* Match nfsv4 minorversion */
+		if (clp->cl_minorversion != data->minorversion)
+			continue;
 		/* Match the full socket address */
 		if (!nfs_sockaddr_cmp(sap, clap))
 			continue;
@@ -456,9 +482,10 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
 		spin_unlock(&nfs_client_lock);
 
 		new = nfs_alloc_client(cl_init);
-	} while (new);
+	} while (!IS_ERR(new));
 
-	return ERR_PTR(-ENOMEM);
+	dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new));
+	return new;
 
 	/* install a new client and return with it unready */
 install_client:
@@ -478,7 +505,7 @@ found_client:
 		nfs_free_client(new);
 
 	error = wait_event_killable(nfs_client_active_wq,
-				clp->cl_cons_state != NFS_CS_INITING);
+				clp->cl_cons_state < NFS_CS_INITING);
 	if (error < 0) {
 		nfs_put_client(clp);
 		return ERR_PTR(-ERESTARTSYS);
@@ -499,13 +526,29 @@ found_client:
 /*
  * Mark a server as ready or failed
  */
-static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+void nfs_mark_client_ready(struct nfs_client *clp, int state)
 {
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
 }
 
 /*
+ * With sessions, the client is not marked ready until after a
+ * successful EXCHANGE_ID and CREATE_SESSION.
+ *
+ * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
+ * other versions of NFS can be tried.
+ */
+int nfs4_check_client_ready(struct nfs_client *clp)
+{
+	if (!nfs4_has_session(clp))
+		return 0;
+	if (clp->cl_cons_state < NFS_CS_READY)
+		return -EPROTONOSUPPORT;
+	return 0;
+}
+
+/*
  * Initialise the timeout values for a connection
  */
 static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -1050,6 +1093,61 @@ error:
 
 #ifdef CONFIG_NFS_V4
 /*
+ * Initialize the NFS4 callback service
+ */
+static int nfs4_init_callback(struct nfs_client *clp)
+{
+	int error;
+
+	if (clp->rpc_ops->version == 4) {
+		if (nfs4_has_session(clp)) {
+			error = xprt_setup_backchannel(
+						clp->cl_rpcclient->cl_xprt,
+						NFS41_BC_MIN_CALLBACKS);
+			if (error < 0)
+				return error;
+		}
+
+		error = nfs_callback_up(clp->cl_minorversion,
+					clp->cl_rpcclient->cl_xprt);
+		if (error < 0) {
+			dprintk("%s: failed to start callback. Error = %d\n",
+				__func__, error);
+			return error;
+		}
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	}
+	return 0;
+}
+
+/*
+ * Initialize the minor version specific parts of an NFS4 client record
+ */
+static int nfs4_init_client_minor_version(struct nfs_client *clp)
+{
+	clp->cl_call_sync = _nfs4_call_sync;
+
+#if defined(CONFIG_NFS_V4_1)
+	if (clp->cl_minorversion) {
+		struct nfs4_session *session = NULL;
+		/*
+		 * Create the session and mark it expired.
+		 * When a SEQUENCE operation encounters the expired session
+		 * it will do session recovery to initialize it.
+		 */
+		session = nfs4_alloc_session(clp);
+		if (!session)
+			return -ENOMEM;
+
+		clp->cl_session = session;
+		clp->cl_call_sync = _nfs4_call_sync_session;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+
+	return nfs4_init_callback(clp);
+}
+
+/*
  * Initialise an NFS4 client record
  */
 static int nfs4_init_client(struct nfs_client *clp,
@@ -1083,7 +1181,12 @@ static int nfs4_init_client(struct nfs_client *clp,
 	}
 	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 
-	nfs_mark_client_ready(clp, NFS_CS_READY);
+	error = nfs4_init_client_minor_version(clp);
+	if (error < 0)
+		goto error;
+
+	if (!nfs4_has_session(clp))
+		nfs_mark_client_ready(clp, NFS_CS_READY);
 	return 0;
 
 error:
@@ -1101,7 +1204,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		const size_t addrlen,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour,
-		int proto, const struct rpc_timeout *timeparms)
+		int proto, const struct rpc_timeout *timeparms,
+		u32 minorversion)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
@@ -1109,6 +1213,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		.addrlen = addrlen,
 		.rpc_ops = &nfs_v4_clientops,
 		.proto = proto,
+		.minorversion = minorversion,
 	};
 	struct nfs_client *clp;
 	int error;
@@ -1138,6 +1243,36 @@ error:
 }
 
 /*
+ * Initialize a session.
+ * Note: save the mount rsize and wsize for create_server negotiation.
+ */
+static void nfs4_init_session(struct nfs_client *clp,
+			      unsigned int wsize, unsigned int rsize)
+{
+#if defined(CONFIG_NFS_V4_1)
+	if (nfs4_has_session(clp)) {
+		clp->cl_session->fc_attrs.max_rqst_sz = wsize;
+		clp->cl_session->fc_attrs.max_resp_sz = rsize;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+}
+
+/*
+ * Session has been established, and the client marked ready.
+ * Set the mount rsize and wsize with negotiated fore channel
+ * attributes which will be bound checked in nfs_server_set_fsinfo.
+ */
+static void nfs4_session_set_rwsize(struct nfs_server *server)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (!nfs4_has_session(server->nfs_client))
+		return;
+	server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+	server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz;
+#endif /* CONFIG_NFS_V4_1 */
+}
+
+/*
  * Create a version 4 volume record
  */
 static int nfs4_init_server(struct nfs_server *server,
@@ -1164,7 +1299,8 @@ static int nfs4_init_server(struct nfs_server *server,
 			data->client_address,
 			data->auth_flavors[0],
 			data->nfs_server.protocol,
-			&timeparms);
+			&timeparms,
+			data->minorversion);
 	if (error < 0)
 		goto error;
 
@@ -1214,6 +1350,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	BUG_ON(!server->nfs_client->rpc_ops);
 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
 
+	nfs4_init_session(server->nfs_client, server->wsize, server->rsize);
+
 	/* Probe the root fh to retrieve its FSID */
 	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
 	if (error < 0)
@@ -1224,6 +1362,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 		(unsigned long long) server->fsid.minor);
 	dprintk("Mount FH: %d\n", mntfh->size);
 
+	nfs4_session_set_rwsize(server);
+
 	error = nfs_probe_fsinfo(server, mntfh, &fattr);
 	if (error < 0)
 		goto error;
@@ -1282,7 +1422,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				parent_client->cl_ipaddr,
 				data->authflavor,
 				parent_server->client->cl_xprt->prot,
-				parent_server->client->cl_timeout);
+				parent_server->client->cl_timeout,
+				parent_client->cl_minorversion);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 968225a88015..af05b918cb5b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -68,29 +68,26 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
 {
 	struct inode *inode = state->inode;
 	struct file_lock *fl;
-	int status;
+	int status = 0;
+
+	if (inode->i_flock == NULL)
+		goto out;
 
+	/* Protect inode->i_flock using the BKL */
+	lock_kernel();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
 		if (nfs_file_open_context(fl->fl_file) != ctx)
 			continue;
+		unlock_kernel();
 		status = nfs4_lock_delegation_recall(state, fl);
-		if (status >= 0)
-			continue;
-		switch (status) {
-			default:
-				printk(KERN_ERR "%s: unhandled error %d.\n",
-						__func__, status);
-			case -NFS4ERR_EXPIRED:
-				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
-			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
-				goto out_err;
-		}
+		if (status < 0)
+			goto out;
+		lock_kernel();
 	}
-	return 0;
-out_err:
+	unlock_kernel();
+out:
 	return status;
 }
 
@@ -268,7 +265,10 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	nfs_msync_inode(inode);
-	/* Guard against new delegated open calls */
+	/*
+	 * Guard against new delegated open/lock/unlock calls and against
+	 * state recovery
+	 */
 	down_write(&nfsi->rwsem);
 	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 08f6b040d289..489fc01a3204 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -259,6 +259,9 @@ static void nfs_direct_read_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_read_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_direct_read_result,
 	.rpc_release = nfs_direct_read_release,
 };
@@ -535,6 +538,9 @@ static void nfs_direct_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_write_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_direct_commit_result,
 	.rpc_release = nfs_direct_commit_release,
 };
@@ -673,6 +679,9 @@ out_unlock:
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_write_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_direct_write_result,
 	.rpc_release = nfs_direct_write_release,
 };
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ec7e27d00bc6..0055b813ec2c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -48,6 +48,9 @@ static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
 					size_t count, unsigned int flags);
 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
+static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
+					struct file *filp, loff_t *ppos,
+					size_t count, unsigned int flags);
 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
 static int  nfs_file_flush(struct file *, fl_owner_t id);
@@ -73,6 +76,7 @@ const struct file_operations nfs_file_operations = {
 	.lock		= nfs_lock,
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
+	.splice_write	= nfs_file_splice_write,
 	.check_flags	= nfs_check_flags,
 	.setlease	= nfs_setlease,
 };
@@ -587,12 +591,38 @@ out_swapfile:
 	goto out;
 }
 
+static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
+				     struct file *filp, loff_t *ppos,
+				     size_t count, unsigned int flags)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	ssize_t ret;
+
+	dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		(unsigned long) count, (unsigned long long) *ppos);
+
+	/*
+	 * The combination of splice and an O_APPEND destination is disallowed.
+	 */
+
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
+
+	ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
+	if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
+		int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
+		if (err < 0)
+			ret = err;
+	}
+	return ret;
+}
+
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
-	lock_kernel();
 	/* Try local locking first */
 	posix_test_lock(filp, fl);
 	if (fl->fl_type != F_UNLCK) {
@@ -608,7 +638,6 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 
 	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 out:
-	unlock_kernel();
 	return status;
 out_noconflict:
 	fl->fl_type = F_UNLCK;
@@ -650,13 +679,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * 	If we're signalled while cleaning up locks on process exit, we
 	 * 	still need to complete the unlock.
 	 */
-	lock_kernel();
 	/* Use local locking if mounted with "-onolock" */
 	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
-	unlock_kernel();
 	return status;
 }
 
@@ -673,13 +700,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	if (status != 0)
 		goto out;
 
-	lock_kernel();
 	/* Use local locking if mounted with "-onolock" */
 	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
-	unlock_kernel();
 	if (status < 0)
 		goto out;
 	/*
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 46177cb87064..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,6 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include <linux/security.h>
 
 #include <asm/system.h>
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4d6a8348adf..7dd90a6769d0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -2,6 +2,7 @@
  * NFS internal definitions
  */
 
+#include "nfs4_fs.h"
 #include <linux/mount.h>
 #include <linux/security.h>
 
@@ -17,6 +18,18 @@ struct nfs_string;
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (clp->cl_session)
+		return 1;
+#endif /* CONFIG_NFS_V4_1 */
+	return 0;
+}
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -30,6 +43,12 @@ struct nfs_clone_mount {
 };
 
 /*
+ * Note: RFC 1813 doesn't limit the number of auth flavors that
+ * a server can return, so make something up.
+ */
+#define NFS_MAX_SECFLAVORS	(12)
+
+/*
  * In-kernel mount arguments
  */
 struct nfs_parsed_mount_data {
@@ -44,6 +63,7 @@ struct nfs_parsed_mount_data {
 	unsigned int		auth_flavor_len;
 	rpc_authflavor_t	auth_flavors[1];
 	char			*client_address;
+	unsigned int		minorversion;
 	char			*fscache_uniq;
 
 	struct {
@@ -77,6 +97,8 @@ struct nfs_mount_request {
 	unsigned short		protocol;
 	struct nfs_fh		*fh;
 	int			noresvport;
+	unsigned int		*auth_flav_len;
+	rpc_authflavor_t	*auth_flavs;
 };
 
 extern int nfs_mount(struct nfs_mount_request *info);
@@ -99,6 +121,8 @@ extern void nfs_free_server(struct nfs_server *server);
 extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *);
+extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+extern int nfs4_check_client_ready(struct nfs_client *clp);
 #ifdef CONFIG_PROC_FS
 extern int __init nfs_fs_proc_init(void);
 extern void nfs_fs_proc_exit(void);
@@ -146,6 +170,20 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
 extern struct rpc_procinfo nfs3_procedures[];
 extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
 
+/* nfs4proc.c */
+static inline void nfs4_restart_rpc(struct rpc_task *task,
+				    const struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (nfs4_has_session(clp) &&
+	    test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
+		rpc_restart_call_prepare(task);
+		return;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+	rpc_restart_call(task);
+}
+
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
 extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
@@ -205,6 +243,38 @@ extern int nfs4_path_walk(struct nfs_server *server,
 			  const char *path);
 #endif
 
+/* read.c */
+extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+
+/* write.c */
+extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
+
+/* nfs4proc.c */
+extern int _nfs4_call_sync(struct nfs_server *server,
+			   struct rpc_message *msg,
+			   struct nfs4_sequence_args *args,
+			   struct nfs4_sequence_res *res,
+			   int cache_reply);
+extern int _nfs4_call_sync_session(struct nfs_server *server,
+				   struct rpc_message *msg,
+				   struct nfs4_sequence_args *args,
+				   struct nfs4_sequence_res *res,
+				   int cache_reply);
+
+#ifdef CONFIG_NFS_V4_1
+extern void nfs41_sequence_free_slot(const struct nfs_client *,
+				     struct nfs4_sequence_res *res);
+#endif /* CONFIG_NFS_V4_1 */
+
+static inline void nfs4_sequence_free_slot(const struct nfs_client *clp,
+					   struct nfs4_sequence_res *res)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (nfs4_has_session(clp))
+		nfs41_sequence_free_slot(clp, res);
+#endif /* CONFIG_NFS_V4_1 */
+}
+
 /*
  * Determine the device name as a string
  */
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca905a5bb1ba..38ef9eaec407 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -20,8 +20,116 @@
 # define NFSDBG_FACILITY	NFSDBG_MOUNT
 #endif
 
+/*
+ * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4
+ */
+#define MNTPATHLEN		(1024)
+
+/*
+ * XDR data type sizes
+ */
+#define encode_dirpath_sz	(1 + XDR_QUADLEN(MNTPATHLEN))
+#define MNT_status_sz		(1)
+#define MNT_fhs_status_sz	(1)
+#define MNT_fhandle_sz		XDR_QUADLEN(NFS2_FHSIZE)
+#define MNT_fhandle3_sz		(1 + XDR_QUADLEN(NFS3_FHSIZE))
+#define MNT_authflav3_sz	(1 + NFS_MAX_SECFLAVORS)
+
+/*
+ * XDR argument and result sizes
+ */
+#define MNT_enc_dirpath_sz	encode_dirpath_sz
+#define MNT_dec_mountres_sz	(MNT_status_sz + MNT_fhandle_sz)
+#define MNT_dec_mountres3_sz	(MNT_status_sz + MNT_fhandle_sz + \
+				 MNT_authflav3_sz)
+
+/*
+ * Defined by RFC 1094, section A.5
+ */
+enum {
+	MOUNTPROC_NULL		= 0,
+	MOUNTPROC_MNT		= 1,
+	MOUNTPROC_DUMP		= 2,
+	MOUNTPROC_UMNT		= 3,
+	MOUNTPROC_UMNTALL	= 4,
+	MOUNTPROC_EXPORT	= 5,
+};
+
+/*
+ * Defined by RFC 1813, section 5.2
+ */
+enum {
+	MOUNTPROC3_NULL		= 0,
+	MOUNTPROC3_MNT		= 1,
+	MOUNTPROC3_DUMP		= 2,
+	MOUNTPROC3_UMNT		= 3,
+	MOUNTPROC3_UMNTALL	= 4,
+	MOUNTPROC3_EXPORT	= 5,
+};
+
 static struct rpc_program	mnt_program;
 
+/*
+ * Defined by OpenGroup XNFS Version 3W, chapter 8
+ */
+enum mountstat {
+	MNT_OK			= 0,
+	MNT_EPERM		= 1,
+	MNT_ENOENT		= 2,
+	MNT_EACCES		= 13,
+	MNT_EINVAL		= 22,
+};
+
+static struct {
+	u32 status;
+	int errno;
+} mnt_errtbl[] = {
+	{ .status = MNT_OK,			.errno = 0,		},
+	{ .status = MNT_EPERM,			.errno = -EPERM,	},
+	{ .status = MNT_ENOENT,			.errno = -ENOENT,	},
+	{ .status = MNT_EACCES,			.errno = -EACCES,	},
+	{ .status = MNT_EINVAL,			.errno = -EINVAL,	},
+};
+
+/*
+ * Defined by RFC 1813, section 5.1.5
+ */
+enum mountstat3 {
+	MNT3_OK			= 0,		/* no error */
+	MNT3ERR_PERM		= 1,		/* Not owner */
+	MNT3ERR_NOENT		= 2,		/* No such file or directory */
+	MNT3ERR_IO		= 5,		/* I/O error */
+	MNT3ERR_ACCES		= 13,		/* Permission denied */
+	MNT3ERR_NOTDIR		= 20,		/* Not a directory */
+	MNT3ERR_INVAL		= 22,		/* Invalid argument */
+	MNT3ERR_NAMETOOLONG	= 63,		/* Filename too long */
+	MNT3ERR_NOTSUPP		= 10004,	/* Operation not supported */
+	MNT3ERR_SERVERFAULT	= 10006,	/* A failure on the server */
+};
+
+static struct {
+	u32 status;
+	int errno;
+} mnt3_errtbl[] = {
+	{ .status = MNT3_OK,			.errno = 0,		},
+	{ .status = MNT3ERR_PERM,		.errno = -EPERM,	},
+	{ .status = MNT3ERR_NOENT,		.errno = -ENOENT,	},
+	{ .status = MNT3ERR_IO,			.errno = -EIO,		},
+	{ .status = MNT3ERR_ACCES,		.errno = -EACCES,	},
+	{ .status = MNT3ERR_NOTDIR,		.errno = -ENOTDIR,	},
+	{ .status = MNT3ERR_INVAL,		.errno = -EINVAL,	},
+	{ .status = MNT3ERR_NAMETOOLONG,	.errno = -ENAMETOOLONG,	},
+	{ .status = MNT3ERR_NOTSUPP,		.errno = -ENOTSUPP,	},
+	{ .status = MNT3ERR_SERVERFAULT,	.errno = -ESERVERFAULT,	},
+};
+
+struct mountres {
+	int errno;
+	struct nfs_fh *fh;
+	unsigned int *auth_count;
+	rpc_authflavor_t *auth_flavors;
+};
+
 struct mnt_fhstatus {
 	u32 status;
 	struct nfs_fh *fh;
@@ -35,8 +143,10 @@ struct mnt_fhstatus {
  */
 int nfs_mount(struct nfs_mount_request *info)
 {
-	struct mnt_fhstatus	result = {
-		.fh		= info->fh
+	struct mountres	result = {
+		.fh		= info->fh,
+		.auth_count	= info->auth_flav_len,
+		.auth_flavors	= info->auth_flavs,
 	};
 	struct rpc_message msg	= {
 		.rpc_argp	= info->dirpath,
@@ -68,14 +178,14 @@ int nfs_mount(struct nfs_mount_request *info)
 	if (info->version == NFS_MNT3_VERSION)
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
 	else
-		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
 
 	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	rpc_shutdown_client(mnt_clnt);
 
 	if (status < 0)
 		goto out_call_err;
-	if (result.status != 0)
+	if (result.errno != 0)
 		goto out_mnt_err;
 
 	dprintk("NFS: MNT request succeeded\n");
@@ -86,72 +196,215 @@ out:
 
 out_clnt_err:
 	status = PTR_ERR(mnt_clnt);
-	dprintk("NFS: failed to create RPC client, status=%d\n", status);
+	dprintk("NFS: failed to create MNT RPC client, status=%d\n", status);
 	goto out;
 
 out_call_err:
-	dprintk("NFS: failed to start MNT request, status=%d\n", status);
+	dprintk("NFS: MNT request failed, status=%d\n", status);
 	goto out;
 
 out_mnt_err:
-	dprintk("NFS: MNT server returned result %d\n", result.status);
-	status = nfs_stat_to_errno(result.status);
+	dprintk("NFS: MNT server returned result %d\n", result.errno);
+	status = result.errno;
 	goto out;
 }
 
 /*
  * XDR encode/decode functions for MOUNT
  */
-static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
-			      const char *path)
+
+static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
+{
+	const u32 pathname_len = strlen(pathname);
+	__be32 *p;
+
+	if (unlikely(pathname_len > MNTPATHLEN))
+		return -EIO;
+
+	p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
+	if (unlikely(p == NULL))
+		return -EIO;
+	xdr_encode_opaque(p, pathname, pathname_len);
+
+	return 0;
+}
+
+static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
+			   const char *dirpath)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	return encode_mntdirpath(&xdr, dirpath);
+}
+
+/*
+ * RFC 1094: "A non-zero status indicates some sort of error.  In this
+ * case, the status is a UNIX error number."  This can be problematic
+ * if the server and client use different errno values for the same
+ * error.
+ *
+ * However, the OpenGroup XNFS spec provides a simple mapping that is
+ * independent of local errno values on the server and the client.
+ */
+static int decode_status(struct xdr_stream *xdr, struct mountres *res)
 {
-	p = xdr_encode_string(p, path);
+	unsigned int i;
+	u32 status;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(status));
+	if (unlikely(p == NULL))
+		return -EIO;
+	status = ntohl(*p);
 
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) {
+		if (mnt_errtbl[i].status == status) {
+			res->errno = mnt_errtbl[i].errno;
+			return 0;
+		}
+	}
+
+	dprintk("NFS: unrecognized MNT status code: %u\n", status);
+	res->errno = -EACCES;
 	return 0;
 }
 
-static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
-			       struct mnt_fhstatus *res)
+static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 {
 	struct nfs_fh *fh = res->fh;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, NFS2_FHSIZE);
+	if (unlikely(p == NULL))
+		return -EIO;
+
+	fh->size = NFS2_FHSIZE;
+	memcpy(fh->data, p, NFS2_FHSIZE);
+	return 0;
+}
+
+static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
+			    struct mountres *res)
+{
+	struct xdr_stream xdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+
+	status = decode_status(&xdr, res);
+	if (unlikely(status != 0 || res->errno != 0))
+		return status;
+	return decode_fhandle(&xdr, res);
+}
+
+static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
+{
+	unsigned int i;
+	u32 status;
+	__be32 *p;
 
-	if ((res->status = ntohl(*p++)) == 0) {
-		fh->size = NFS2_FHSIZE;
-		memcpy(fh->data, p, NFS2_FHSIZE);
+	p = xdr_inline_decode(xdr, sizeof(status));
+	if (unlikely(p == NULL))
+		return -EIO;
+	status = ntohl(*p);
+
+	for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) {
+		if (mnt3_errtbl[i].status == status) {
+			res->errno = mnt3_errtbl[i].errno;
+			return 0;
+		}
 	}
+
+	dprintk("NFS: unrecognized MNT3 status code: %u\n", status);
+	res->errno = -EACCES;
 	return 0;
 }
 
-static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
-				struct mnt_fhstatus *res)
+static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
 {
 	struct nfs_fh *fh = res->fh;
-	unsigned size;
-
-	if ((res->status = ntohl(*p++)) == 0) {
-		size = ntohl(*p++);
-		if (size <= NFS3_FHSIZE && size != 0) {
-			fh->size = size;
-			memcpy(fh->data, p, size);
-		} else
-			res->status = -EBADHANDLE;
+	u32 size;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(size));
+	if (unlikely(p == NULL))
+		return -EIO;
+
+	size = ntohl(*p++);
+	if (size > NFS3_FHSIZE || size == 0)
+		return -EIO;
+
+	p = xdr_inline_decode(xdr, size);
+	if (unlikely(p == NULL))
+		return -EIO;
+
+	fh->size = size;
+	memcpy(fh->data, p, size);
+	return 0;
+}
+
+static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
+{
+	rpc_authflavor_t *flavors = res->auth_flavors;
+	unsigned int *count = res->auth_count;
+	u32 entries, i;
+	__be32 *p;
+
+	if (*count == 0)
+		return 0;
+
+	p = xdr_inline_decode(xdr, sizeof(entries));
+	if (unlikely(p == NULL))
+		return -EIO;
+	entries = ntohl(*p);
+	dprintk("NFS: received %u auth flavors\n", entries);
+	if (entries > NFS_MAX_SECFLAVORS)
+		entries = NFS_MAX_SECFLAVORS;
+
+	p = xdr_inline_decode(xdr, sizeof(u32) * entries);
+	if (unlikely(p == NULL))
+		return -EIO;
+
+	if (entries > *count)
+		entries = *count;
+
+	for (i = 0; i < entries; i++) {
+		flavors[i] = ntohl(*p++);
+		dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]);
 	}
+	*count = i;
+
 	return 0;
 }
 
-#define MNT_dirpath_sz		(1 + 256)
-#define MNT_fhstatus_sz		(1 + 8)
-#define MNT_fhstatus3_sz	(1 + 16)
+static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
+			     struct mountres *res)
+{
+	struct xdr_stream xdr;
+	int status;
+
+	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+
+	status = decode_fhs_status(&xdr, res);
+	if (unlikely(status != 0 || res->errno != 0))
+		return status;
+	status = decode_fhandle3(&xdr, res);
+	if (unlikely(status != 0)) {
+		res->errno = -EBADHANDLE;
+		return 0;
+	}
+	return decode_auth_flavors(&xdr, res);
+}
 
 static struct rpc_procinfo mnt_procedures[] = {
-	[MNTPROC_MNT] = {
-		.p_proc		= MNTPROC_MNT,
-		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
-		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus,
-		.p_arglen	= MNT_dirpath_sz,
-		.p_replen	= MNT_fhstatus_sz,
-		.p_statidx	= MNTPROC_MNT,
+	[MOUNTPROC_MNT] = {
+		.p_proc		= MOUNTPROC_MNT,
+		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
+		.p_arglen	= MNT_enc_dirpath_sz,
+		.p_replen	= MNT_dec_mountres_sz,
+		.p_statidx	= MOUNTPROC_MNT,
 		.p_name		= "MOUNT",
 	},
 };
@@ -159,10 +412,10 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
-		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus3,
-		.p_arglen	= MNT_dirpath_sz,
-		.p_replen	= MNT_fhstatus3_sz,
+		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
+		.p_arglen	= MNT_enc_dirpath_sz,
+		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
 		.p_name		= "MOUNT",
 	},
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f01caec84463..40c766782891 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -65,6 +65,11 @@ char *nfs_path(const char *base,
 		dentry = dentry->d_parent;
 	}
 	spin_unlock(&dcache_lock);
+	if (*end != '/') {
+		if (--buflen < 0)
+			goto Elong;
+		*--end = '/';
+	}
 	namelen = strlen(base);
 	/* Strip off excess slashes in base string */
 	while (namelen > 0 && base[namelen - 1] == '/')
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6bbf0e6daad2..bac60515a4b3 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -207,8 +207,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
 	status = nfs_revalidate_inode(server, inode);
 	if (status < 0)
 		return ERR_PTR(status);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
-		nfs_zap_acl_cache(inode);
 	acl = nfs3_get_cached_acl(inode, type);
 	if (acl != ERR_PTR(-EAGAIN))
 		return acl;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 84345deab26f..61bc3a32e1e2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
+	NFS4CLNT_SESSION_SETUP,
 };
 
 /*
@@ -177,6 +178,14 @@ struct nfs4_state_recovery_ops {
 	int state_flag_bit;
 	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
 	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
+	int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
+	struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
+};
+
+struct nfs4_state_maintenance_ops {
+	int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *);
+	struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
+	int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
 };
 
 extern const struct dentry_operations nfs4_dentry_operations;
@@ -193,6 +202,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -200,8 +210,26 @@ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		struct nfs4_fs_locations *fs_locations, struct page *page);
 
-extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
-extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops;
+extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+#if defined(CONFIG_NFS_V4_1)
+extern int nfs4_setup_sequence(struct nfs_client *clp,
+		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+		int cache_reply, struct rpc_task *task);
+extern void nfs4_destroy_session(struct nfs4_session *session);
+extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
+extern int nfs4_proc_create_session(struct nfs_client *, int reset);
+extern int nfs4_proc_destroy_session(struct nfs4_session *);
+#else /* CONFIG_NFS_v4_1 */
+static inline int nfs4_setup_sequence(struct nfs_client *clp,
+		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+		int cache_reply, struct rpc_task *task)
+{
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
 
 extern const u32 nfs4_fattr_bitmap[2];
 extern const u32 nfs4_statfs_bitmap[2];
@@ -216,7 +244,12 @@ extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
+#if defined(CONFIG_NFS_V4_1)
+struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
+struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+#endif /* CONFIG_NFS_V4_1 */
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4674f8092da8..92ce43517814 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -48,11 +48,14 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
 #include "internal.h"
 #include "iostat.h"
+#include "callback.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -247,7 +250,25 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 			ret = nfs4_wait_clnt_recover(clp);
 			if (ret == 0)
 				exception->retry = 1;
+#if !defined(CONFIG_NFS_V4_1)
 			break;
+#else /* !defined(CONFIG_NFS_V4_1) */
+			if (!nfs4_has_session(server->nfs_client))
+				break;
+			/* FALLTHROUGH */
+		case -NFS4ERR_BADSESSION:
+		case -NFS4ERR_BADSLOT:
+		case -NFS4ERR_BAD_HIGH_SLOT:
+		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+		case -NFS4ERR_DEADSESSION:
+		case -NFS4ERR_SEQ_FALSE_RETRY:
+		case -NFS4ERR_SEQ_MISORDERED:
+			dprintk("%s ERROR: %d Reset session\n", __func__,
+				errorcode);
+			set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+			exception->retry = 1;
+			/* FALLTHROUGH */
+#endif /* !defined(CONFIG_NFS_V4_1) */
 		case -NFS4ERR_FILE_OPEN:
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
@@ -271,6 +292,353 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
 	spin_unlock(&clp->cl_lock);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
+/*
+ * nfs4_free_slot - free a slot and efficiently update slot table.
+ *
+ * freeing a slot is trivially done by clearing its respective bit
+ * in the bitmap.
+ * If the freed slotid equals highest_used_slotid we want to update it
+ * so that the server would be able to size down the slot table if needed,
+ * otherwise we know that the highest_used_slotid is still in use.
+ * When updating highest_used_slotid there may be "holes" in the bitmap
+ * so we need to scan down from highest_used_slotid to 0 looking for the now
+ * highest slotid in use.
+ * If none found, highest_used_slotid is set to -1.
+ */
+static void
+nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
+{
+	int slotid = free_slotid;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	/* clear used bit in bitmap */
+	__clear_bit(slotid, tbl->used_slots);
+
+	/* update highest_used_slotid when it is freed */
+	if (slotid == tbl->highest_used_slotid) {
+		slotid = find_last_bit(tbl->used_slots, tbl->max_slots);
+		if (slotid >= 0 && slotid < tbl->max_slots)
+			tbl->highest_used_slotid = slotid;
+		else
+			tbl->highest_used_slotid = -1;
+	}
+	rpc_wake_up_next(&tbl->slot_tbl_waitq);
+	spin_unlock(&tbl->slot_tbl_lock);
+	dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__,
+		free_slotid, tbl->highest_used_slotid);
+}
+
+void nfs41_sequence_free_slot(const struct nfs_client *clp,
+			      struct nfs4_sequence_res *res)
+{
+	struct nfs4_slot_table *tbl;
+
+	if (!nfs4_has_session(clp)) {
+		dprintk("%s: No session\n", __func__);
+		return;
+	}
+	tbl = &clp->cl_session->fc_slot_table;
+	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+		dprintk("%s: No slot\n", __func__);
+		/* just wake up the next guy waiting since
+		 * we may have not consumed a slot after all */
+		rpc_wake_up_next(&tbl->slot_tbl_waitq);
+		return;
+	}
+	nfs4_free_slot(tbl, res->sr_slotid);
+	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+}
+
+static void nfs41_sequence_done(struct nfs_client *clp,
+				struct nfs4_sequence_res *res,
+				int rpc_status)
+{
+	unsigned long timestamp;
+	struct nfs4_slot_table *tbl;
+	struct nfs4_slot *slot;
+
+	/*
+	 * sr_status remains 1 if an RPC level error occurred. The server
+	 * may or may not have processed the sequence operation..
+	 * Proceed as if the server received and processed the sequence
+	 * operation.
+	 */
+	if (res->sr_status == 1)
+		res->sr_status = NFS_OK;
+
+	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
+	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+		goto out;
+
+	tbl = &clp->cl_session->fc_slot_table;
+	slot = tbl->slots + res->sr_slotid;
+
+	if (res->sr_status == 0) {
+		/* Update the slot's sequence and clientid lease timer */
+		++slot->seq_nr;
+		timestamp = res->sr_renewal_time;
+		spin_lock(&clp->cl_lock);
+		if (time_before(clp->cl_last_renewal, timestamp))
+			clp->cl_last_renewal = timestamp;
+		spin_unlock(&clp->cl_lock);
+		return;
+	}
+out:
+	/* The session may be reset by one of the error handlers. */
+	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+	nfs41_sequence_free_slot(clp, res);
+}
+
+/*
+ * nfs4_find_slot - efficiently look for a free slot
+ *
+ * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
+ * If found, we mark the slot as used, update the highest_used_slotid,
+ * and respectively set up the sequence operation args.
+ * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise.
+ *
+ * Note: must be called with under the slot_tbl_lock.
+ */
+static u8
+nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
+{
+	int slotid;
+	u8 ret_id = NFS4_MAX_SLOT_TABLE;
+	BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
+
+	dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n",
+		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
+		tbl->max_slots);
+	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
+	if (slotid >= tbl->max_slots)
+		goto out;
+	__set_bit(slotid, tbl->used_slots);
+	if (slotid > tbl->highest_used_slotid)
+		tbl->highest_used_slotid = slotid;
+	ret_id = slotid;
+out:
+	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
+		__func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id);
+	return ret_id;
+}
+
+static int nfs4_recover_session(struct nfs4_session *session)
+{
+	struct nfs_client *clp = session->clp;
+	int ret;
+
+	for (;;) {
+		ret = nfs4_wait_clnt_recover(clp);
+		if (ret != 0)
+				return ret;
+		if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
+			break;
+		nfs4_schedule_state_manager(clp);
+	}
+	return 0;
+}
+
+static int nfs41_setup_sequence(struct nfs4_session *session,
+				struct nfs4_sequence_args *args,
+				struct nfs4_sequence_res *res,
+				int cache_reply,
+				struct rpc_task *task)
+{
+	struct nfs4_slot *slot;
+	struct nfs4_slot_table *tbl;
+	int status = 0;
+	u8 slotid;
+
+	dprintk("--> %s\n", __func__);
+	/* slot already allocated? */
+	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+		return 0;
+
+	memset(res, 0, sizeof(*res));
+	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	tbl = &session->fc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) {
+		if (tbl->highest_used_slotid != -1) {
+			rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+			spin_unlock(&tbl->slot_tbl_lock);
+			dprintk("<-- %s: Session reset: draining\n", __func__);
+			return -EAGAIN;
+		}
+
+		/* The slot table is empty; start the reset thread */
+		dprintk("%s Session Reset\n", __func__);
+		spin_unlock(&tbl->slot_tbl_lock);
+		status = nfs4_recover_session(session);
+		if (status)
+			return status;
+		spin_lock(&tbl->slot_tbl_lock);
+	}
+
+	slotid = nfs4_find_slot(tbl, task);
+	if (slotid == NFS4_MAX_SLOT_TABLE) {
+		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+		spin_unlock(&tbl->slot_tbl_lock);
+		dprintk("<-- %s: no free slots\n", __func__);
+		return -EAGAIN;
+	}
+	spin_unlock(&tbl->slot_tbl_lock);
+
+	slot = tbl->slots + slotid;
+	args->sa_session = session;
+	args->sa_slotid = slotid;
+	args->sa_cache_this = cache_reply;
+
+	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
+
+	res->sr_session = session;
+	res->sr_slotid = slotid;
+	res->sr_renewal_time = jiffies;
+	/*
+	 * sr_status is only set in decode_sequence, and so will remain
+	 * set to 1 if an rpc level failure occurs.
+	 */
+	res->sr_status = 1;
+	return 0;
+}
+
+int nfs4_setup_sequence(struct nfs_client *clp,
+			struct nfs4_sequence_args *args,
+			struct nfs4_sequence_res *res,
+			int cache_reply,
+			struct rpc_task *task)
+{
+	int ret = 0;
+
+	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+		__func__, clp, clp->cl_session, res->sr_slotid);
+
+	if (!nfs4_has_session(clp))
+		goto out;
+	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
+				   task);
+	if (ret != -EAGAIN) {
+		/* terminate rpc task */
+		task->tk_status = ret;
+		task->tk_action = NULL;
+	}
+out:
+	dprintk("<-- %s status=%d\n", __func__, ret);
+	return ret;
+}
+
+struct nfs41_call_sync_data {
+	struct nfs_client *clp;
+	struct nfs4_sequence_args *seq_args;
+	struct nfs4_sequence_res *seq_res;
+	int cache_reply;
+};
+
+static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs41_call_sync_data *data = calldata;
+
+	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+		data->clp->cl_session);
+	if (nfs4_setup_sequence(data->clp, data->seq_args,
+				data->seq_res, data->cache_reply, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs41_call_sync_data *data = calldata;
+
+	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
+	nfs41_sequence_free_slot(data->clp, data->seq_res);
+}
+
+struct rpc_call_ops nfs41_call_sync_ops = {
+	.rpc_call_prepare = nfs41_call_sync_prepare,
+	.rpc_call_done = nfs41_call_sync_done,
+};
+
+static int nfs4_call_sync_sequence(struct nfs_client *clp,
+				   struct rpc_clnt *clnt,
+				   struct rpc_message *msg,
+				   struct nfs4_sequence_args *args,
+				   struct nfs4_sequence_res *res,
+				   int cache_reply)
+{
+	int ret;
+	struct rpc_task *task;
+	struct nfs41_call_sync_data data = {
+		.clp = clp,
+		.seq_args = args,
+		.seq_res = res,
+		.cache_reply = cache_reply,
+	};
+	struct rpc_task_setup task_setup = {
+		.rpc_client = clnt,
+		.rpc_message = msg,
+		.callback_ops = &nfs41_call_sync_ops,
+		.callback_data = &data
+	};
+
+	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	task = rpc_run_task(&task_setup);
+	if (IS_ERR(task))
+		ret = PTR_ERR(task);
+	else {
+		ret = task->tk_status;
+		rpc_put_task(task);
+	}
+	return ret;
+}
+
+int _nfs4_call_sync_session(struct nfs_server *server,
+			    struct rpc_message *msg,
+			    struct nfs4_sequence_args *args,
+			    struct nfs4_sequence_res *res,
+			    int cache_reply)
+{
+	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+				       msg, args, res, cache_reply);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+int _nfs4_call_sync(struct nfs_server *server,
+		    struct rpc_message *msg,
+		    struct nfs4_sequence_args *args,
+		    struct nfs4_sequence_res *res,
+		    int cache_reply)
+{
+	args->sa_session = res->sr_session = NULL;
+	return rpc_call_sync(server->client, msg, 0);
+}
+
+#define nfs4_call_sync(server, msg, args, res, cache_reply) \
+	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
+			&(res)->seq_res, (cache_reply))
+
+static void nfs4_sequence_done(const struct nfs_server *server,
+			       struct nfs4_sequence_res *res, int rpc_status)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (nfs4_has_session(server->nfs_client))
+		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+#endif /* CONFIG_NFS_V4_1 */
+}
+
+/* no restart, therefore free slot here */
+static void nfs4_sequence_done_free_slot(const struct nfs_server *server,
+					 struct nfs4_sequence_res *res,
+					 int rpc_status)
+{
+	nfs4_sequence_done(server, res, rpc_status);
+	nfs4_sequence_free_slot(server->nfs_client, res);
+}
+
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
@@ -312,6 +680,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
 	nfs_fattr_init(&p->dir_attr);
+	p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -804,16 +1173,30 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
 		err = _nfs4_open_delegation_recall(ctx, state, stateid);
 		switch (err) {
 			case 0:
-				return err;
+			case -ENOENT:
+			case -ESTALE:
+				goto out;
 			case -NFS4ERR_STALE_CLIENTID:
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
 				nfs4_schedule_state_recovery(server->nfs_client);
-				return err;
+				goto out;
+			case -ERESTARTSYS:
+				/*
+				 * The show must go on: exit, but mark the
+				 * stateid as needing recovery.
+				 */
+			case -NFS4ERR_ADMIN_REVOKED:
+			case -NFS4ERR_BAD_STATEID:
+				nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+			case -ENOMEM:
+				err = 0;
+				goto out;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
+out:
 	return err;
 }
 
@@ -929,6 +1312,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
 	}
 	data->timestamp = jiffies;
+	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
+				&data->o_arg.seq_args,
+				&data->o_res.seq_res, 1, task))
+		return;
 	rpc_call_start(task);
 	return;
 out_no_action:
@@ -941,6 +1328,10 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 	struct nfs4_opendata *data = calldata;
 
 	data->rpc_status = task->tk_status;
+
+	nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res,
+				     task->tk_status);
+
 	if (RPC_ASSASSINATED(task))
 		return;
 	if (task->tk_status == 0) {
@@ -1269,7 +1660,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	if (status == 0 && state != NULL)
 		renew_lease(server, timestamp);
 	return status;
@@ -1318,6 +1709,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	struct nfs4_state *state = calldata->state;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
+	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
 	if (RPC_ASSASSINATED(task))
 		return;
         /* hmm. we are done with the inode, and in the process of freeing
@@ -1336,10 +1728,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 				break;
 		default:
 			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
-				rpc_restart_call(task);
+				nfs4_restart_rpc(task, server->nfs_client);
 				return;
 			}
 	}
+	nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res);
 	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
 }
 
@@ -1380,6 +1773,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		calldata->arg.fmode = FMODE_WRITE;
 	}
 	calldata->timestamp = jiffies;
+	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
+				&calldata->arg.seq_args, &calldata->res.seq_res,
+				1, task))
+		return;
 	rpc_call_start(task);
 }
 
@@ -1419,13 +1816,15 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	};
 	int status = -ENOMEM;
 
-	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
 		goto out;
 	calldata->inode = state->inode;
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(state->inode);
 	calldata->arg.stateid = &state->open_stateid;
+	if (nfs4_has_session(server->nfs_client))
+		memset(calldata->arg.stateid->data, 0, 4);    /* clear seqid */
 	/* Serialization for the sequence id */
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
@@ -1435,6 +1834,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
+	calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	calldata->path.mnt = mntget(path->mnt);
 	calldata->path.dentry = dget(path->dentry);
 
@@ -1584,15 +1984,18 @@ void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
+	struct nfs4_server_caps_arg args = {
+		.fhandle = fhandle,
+	};
 	struct nfs4_server_caps_res res = {};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
-		.rpc_argp = fhandle,
+		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
 	int status;
 
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	if (status == 0) {
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
 		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
@@ -1606,6 +2009,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		server->acl_bitmask = res.acl_bitmask;
 	}
+
 	return status;
 }
 
@@ -1637,8 +2041,15 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	int status;
+
 	nfs_fattr_init(info->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_recover_expired_lease(server);
+	if (!status)
+		status = nfs4_check_client_ready(server->nfs_client);
+	if (!status)
+		status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	return status;
 }
 
 static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -1728,7 +2139,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	};
 	
 	nfs_fattr_init(fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -1812,7 +2223,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
 	nfs_fattr_init(fattr);
 
 	dprintk("NFS call  lookupfh %s\n", name->name);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
 	return status;
 }
@@ -1898,7 +2309,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
 			args.access |= NFS4_ACCESS_EXECUTE;
 	}
 	nfs_fattr_init(&fattr);
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	if (!status) {
 		entry->mask = 0;
 		if (res.access & NFS4_ACCESS_READ)
@@ -1957,13 +2368,14 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
 		.pglen    = pglen,
 		.pages    = &page,
 	};
+	struct nfs4_readlink_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
 		.rpc_argp = &args,
-		.rpc_resp = NULL,
+		.rpc_resp = &res,
 	};
 
-	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2057,7 +2469,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 	int			status;
 
 	nfs_fattr_init(&res.dir_attr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, &res.dir_attr);
@@ -2092,8 +2504,10 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
 
+	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
 		return 0;
+	nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res);
 	update_changeattr(dir, &res->cinfo);
 	nfs_post_op_update_inode(dir, &res->dir_attr);
 	return 1;
@@ -2125,7 +2539,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	
 	nfs_fattr_init(res.old_fattr);
 	nfs_fattr_init(res.new_fattr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
@@ -2174,7 +2588,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
 
 	nfs_fattr_init(res.fattr);
 	nfs_fattr_init(res.dir_attr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	if (!status) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2235,7 +2649,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 
 static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
 {
-	int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
+	int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg,
+				    &data->arg, &data->res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &data->res.dir_cinfo);
 		nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2344,7 +2759,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 			(unsigned long long)cookie);
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 
@@ -2422,14 +2837,17 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_statfs_res res = {
+		.fsstat = fsstat,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
 		.rpc_argp = &args,
-		.rpc_resp = fsstat,
+		.rpc_resp = &res,
 	};
 
 	nfs_fattr_init(fsstat->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return  nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -2451,13 +2869,16 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_fsinfo_res res = {
+		.fsinfo = fsinfo,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
 		.rpc_argp = &args,
-		.rpc_resp = fsinfo,
+		.rpc_resp = &res,
 	};
 
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -2486,10 +2907,13 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_pathconf_res res = {
+		.pathconf = pathconf,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
 		.rpc_argp = &args,
-		.rpc_resp = pathconf,
+		.rpc_resp = &res,
 	};
 
 	/* None of the pathconf attributes are mandatory to implement */
@@ -2499,7 +2923,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 	}
 
 	nfs_fattr_init(pathconf->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2520,8 +2944,13 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
 
+	dprintk("--> %s\n", __func__);
+
+	/* nfs4_sequence_free_slot called in the read rpc_call_done */
+	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
+
 	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
-		rpc_restart_call(task);
+		nfs4_restart_rpc(task, server->nfs_client);
 		return -EAGAIN;
 	}
 
@@ -2541,8 +2970,12 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
 	
+	/* slot is freed in nfs_writeback_done */
+	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+			   task->tk_status);
+
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
-		rpc_restart_call(task);
+		nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
 		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
@@ -2567,10 +3000,14 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
 	
+	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+			   task->tk_status);
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
-		rpc_restart_call(task);
+		nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
 		return -EAGAIN;
 	}
+	nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client,
+				&data->res.seq_res);
 	nfs_refresh_inode(inode, data->res.fattr);
 	return 0;
 }
@@ -2603,6 +3040,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
 	spin_unlock(&clp->cl_lock);
+	dprintk("%s calling put_rpccred on rpc_cred %p\n", __func__,
+				task->tk_msg.rpc_cred);
+	put_rpccred(task->tk_msg.rpc_cred);
 }
 
 static const struct rpc_call_ops nfs4_renew_ops = {
@@ -2742,12 +3182,14 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.acl_pages = pages,
 		.acl_len = buflen,
 	};
-	size_t resp_len = buflen;
+	struct nfs_getaclres res = {
+		.acl_len = buflen,
+	};
 	void *resp_buf;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
 		.rpc_argp = &args,
-		.rpc_resp = &resp_len,
+		.rpc_resp = &res,
 	};
 	struct page *localpage = NULL;
 	int ret;
@@ -2761,26 +3203,26 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 			return -ENOMEM;
 		args.acl_pages[0] = localpage;
 		args.acl_pgbase = 0;
-		resp_len = args.acl_len = PAGE_SIZE;
+		args.acl_len = PAGE_SIZE;
 	} else {
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
 	}
-	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
 	if (ret)
 		goto out_free;
-	if (resp_len > args.acl_len)
-		nfs4_write_cached_acl(inode, NULL, resp_len);
+	if (res.acl_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, res.acl_len);
 	else
-		nfs4_write_cached_acl(inode, resp_buf, resp_len);
+		nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
 	if (buf) {
 		ret = -ERANGE;
-		if (resp_len > buflen)
+		if (res.acl_len > buflen)
 			goto out_free;
 		if (localpage)
-			memcpy(buf, resp_buf, resp_len);
+			memcpy(buf, resp_buf, res.acl_len);
 	}
-	ret = resp_len;
+	ret = res.acl_len;
 out_free:
 	if (localpage)
 		__free_page(localpage);
@@ -2810,8 +3252,6 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	ret = nfs_revalidate_inode(server, inode);
 	if (ret < 0)
 		return ret;
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
-		nfs_zap_acl_cache(inode);
 	ret = nfs4_read_cached_acl(inode, buf, buflen);
 	if (ret != -ENOENT)
 		return ret;
@@ -2827,10 +3267,11 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		.acl_pages	= pages,
 		.acl_len	= buflen,
 	};
+	struct nfs_setaclres res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
+		.rpc_resp	= &res,
 	};
 	int ret;
 
@@ -2838,7 +3279,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	nfs_access_zap_cache(inode);
 	nfs_zap_acl_cache(inode);
 	return ret;
@@ -2857,10 +3298,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 }
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
 {
-	struct nfs_client *clp = server->nfs_client;
-
 	if (!clp || task->tk_status >= 0)
 		return 0;
 	switch(task->tk_status) {
@@ -2879,8 +3318,23 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 				rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
 			task->tk_status = 0;
 			return -EAGAIN;
+#if defined(CONFIG_NFS_V4_1)
+		case -NFS4ERR_BADSESSION:
+		case -NFS4ERR_BADSLOT:
+		case -NFS4ERR_BAD_HIGH_SLOT:
+		case -NFS4ERR_DEADSESSION:
+		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+		case -NFS4ERR_SEQ_FALSE_RETRY:
+		case -NFS4ERR_SEQ_MISORDERED:
+			dprintk("%s ERROR %d, Reset session\n", __func__,
+				task->tk_status);
+			set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+			task->tk_status = 0;
+			return -EAGAIN;
+#endif /* CONFIG_NFS_V4_1 */
 		case -NFS4ERR_DELAY:
-			nfs_inc_server_stats(server, NFSIOS_DELAY);
+			if (server)
+				nfs_inc_server_stats(server, NFSIOS_DELAY);
 		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
@@ -2893,6 +3347,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 	return 0;
 }
 
+static int
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+{
+	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+}
+
 int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
@@ -3000,6 +3460,10 @@ struct nfs4_delegreturndata {
 static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegreturndata *data = calldata;
+
+	nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res,
+				     task->tk_status);
+
 	data->rpc_status = task->tk_status;
 	if (data->rpc_status == 0)
 		renew_lease(data->res.server, data->timestamp);
@@ -3010,7 +3474,25 @@ static void nfs4_delegreturn_release(void *calldata)
 	kfree(calldata);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
+{
+	struct nfs4_delegreturndata *d_data;
+
+	d_data = (struct nfs4_delegreturndata *)data;
+
+	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
+				&d_data->args.seq_args,
+				&d_data->res.seq_res, 1, task))
+		return;
+	rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static const struct rpc_call_ops nfs4_delegreturn_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs4_delegreturn_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
 };
@@ -3032,7 +3514,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	};
 	int status = 0;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
 		return -ENOMEM;
 	data->args.fhandle = &data->fh;
@@ -3042,6 +3524,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	memcpy(&data->stateid, stateid, sizeof(data->stateid));
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
+	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
@@ -3127,7 +3610,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
 	arg.lock_owner.id = lsp->ls_id.id;
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	switch (status) {
 		case 0:
 			request->fl_type = F_UNLCK;
@@ -3187,13 +3670,14 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	struct nfs4_unlockdata *p;
 	struct inode *inode = lsp->ls_state->inode;
 
-	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (p == NULL)
 		return NULL;
 	p->arg.fh = NFS_FH(inode);
 	p->arg.fl = &p->fl;
 	p->arg.seqid = seqid;
 	p->res.seqid = seqid;
+	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->arg.stateid = &lsp->ls_stateid;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3217,6 +3701,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
 
+	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+			   task->tk_status);
 	if (RPC_ASSASSINATED(task))
 		return;
 	switch (task->tk_status) {
@@ -3233,8 +3719,11 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
-				rpc_restart_call(task);
+				nfs4_restart_rpc(task,
+						calldata->server->nfs_client);
 	}
+	nfs4_sequence_free_slot(calldata->server->nfs_client,
+				&calldata->res.seq_res);
 }
 
 static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -3249,6 +3738,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 		return;
 	}
 	calldata->timestamp = jiffies;
+	if (nfs4_setup_sequence(calldata->server->nfs_client,
+				&calldata->arg.seq_args,
+				&calldata->res.seq_res, 1, task))
+		return;
 	rpc_call_start(task);
 }
 
@@ -3341,6 +3834,7 @@ struct nfs4_lockdata {
 	unsigned long timestamp;
 	int rpc_status;
 	int cancelled;
+	struct nfs_server *server;
 };
 
 static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
@@ -3366,7 +3860,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->res.lock_seqid = p->arg.lock_seqid;
+	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->lsp = lsp;
+	p->server = server;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
 	memcpy(&p->fl, fl, sizeof(p->fl));
@@ -3396,6 +3892,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	} else
 		data->arg.new_lock_owner = 0;
 	data->timestamp = jiffies;
+	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
+				&data->res.seq_res, 1, task))
+		return;
 	rpc_call_start(task);
 	dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
@@ -3406,6 +3905,9 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 
 	dprintk("%s: begin!\n", __func__);
 
+	nfs4_sequence_done_free_slot(data->server, &data->res.seq_res,
+				     task->tk_status);
+
 	data->rpc_status = task->tk_status;
 	if (RPC_ASSASSINATED(task))
 		goto out;
@@ -3487,8 +3989,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 	ret = nfs4_wait_for_completion_rpc_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
-		if (ret == -NFS4ERR_DENIED)
-			ret = -EAGAIN;
 	} else
 		data->cancelled = 1;
 	rpc_put_task(task);
@@ -3576,9 +4076,11 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
 	int err;
 
 	do {
+		err = _nfs4_proc_setlk(state, cmd, request);
+		if (err == -NFS4ERR_DENIED)
+			err = -EAGAIN;
 		err = nfs4_handle_exception(NFS_SERVER(state->inode),
-				_nfs4_proc_setlk(state, cmd, request),
-				&exception);
+				err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -3630,8 +4132,37 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
 		goto out;
 	do {
 		err = _nfs4_do_setlk(state, F_SETLK, fl, 0);
-		if (err != -NFS4ERR_DELAY)
-			break;
+		switch (err) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d.\n",
+						__func__, err);
+			case 0:
+			case -ESTALE:
+				goto out;
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_STALE_STATEID:
+				nfs4_schedule_state_recovery(server->nfs_client);
+				goto out;
+			case -ERESTARTSYS:
+				/*
+				 * The show must go on: exit, but mark the
+				 * stateid as needing recovery.
+				 */
+			case -NFS4ERR_ADMIN_REVOKED:
+			case -NFS4ERR_BAD_STATEID:
+			case -NFS4ERR_OPENMODE:
+				nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+				err = 0;
+				goto out;
+			case -ENOMEM:
+			case -NFS4ERR_DENIED:
+				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
+				err = 0;
+				goto out;
+			case -NFS4ERR_DELAY:
+				break;
+		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 out:
@@ -3706,10 +4237,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		.page = page,
 		.bitmask = bitmask,
 	};
+	struct nfs4_fs_locations_res res = {
+		.fs_locations = fs_locations,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
 		.rpc_argp = &args,
-		.rpc_resp = fs_locations,
+		.rpc_resp = &res,
 	};
 	int status;
 
@@ -3717,24 +4251,720 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 	nfs_fattr_init(&fs_locations->fattr);
 	fs_locations->server = server;
 	fs_locations->nlocations = 0;
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	nfs_fixup_referral_attributes(&fs_locations->fattr);
 	dprintk("%s: returned status = %d\n", __func__, status);
 	return status;
 }
 
-struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
+#ifdef CONFIG_NFS_V4_1
+/*
+ * nfs4_proc_exchange_id()
+ *
+ * Since the clientid has expired, all compounds using sessions
+ * associated with the stale clientid will be returning
+ * NFS4ERR_BADSESSION in the sequence operation, and will therefore
+ * be in some phase of session reset.
+ */
+static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+{
+	nfs4_verifier verifier;
+	struct nfs41_exchange_id_args args = {
+		.client = clp,
+		.flags = clp->cl_exchange_flags,
+	};
+	struct nfs41_exchange_id_res res = {
+		.client = clp,
+	};
+	int status;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+		.rpc_cred = cred,
+	};
+	__be32 *p;
+
+	dprintk("--> %s\n", __func__);
+	BUG_ON(clp == NULL);
+
+	p = (u32 *)verifier.data;
+	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
+	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
+	args.verifier = &verifier;
+
+	while (1) {
+		args.id_len = scnprintf(args.id, sizeof(args.id),
+					"%s/%s %u",
+					clp->cl_ipaddr,
+					rpc_peeraddr2str(clp->cl_rpcclient,
+							 RPC_DISPLAY_ADDR),
+					clp->cl_id_uniquifier);
+
+		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+
+		if (status != NFS4ERR_CLID_INUSE)
+			break;
+
+		if (signalled())
+			break;
+
+		if (++clp->cl_id_uniquifier == 0)
+			break;
+	}
+
+	dprintk("<-- %s status= %d\n", __func__, status);
+	return status;
+}
+
+struct nfs4_get_lease_time_data {
+	struct nfs4_get_lease_time_args *args;
+	struct nfs4_get_lease_time_res *res;
+	struct nfs_client *clp;
+};
+
+static void nfs4_get_lease_time_prepare(struct rpc_task *task,
+					void *calldata)
+{
+	int ret;
+	struct nfs4_get_lease_time_data *data =
+			(struct nfs4_get_lease_time_data *)calldata;
+
+	dprintk("--> %s\n", __func__);
+	/* just setup sequence, do not trigger session recovery
+	   since we're invoked within one */
+	ret = nfs41_setup_sequence(data->clp->cl_session,
+					&data->args->la_seq_args,
+					&data->res->lr_seq_res, 0, task);
+
+	BUG_ON(ret == -EAGAIN);
+	rpc_call_start(task);
+	dprintk("<-- %s\n", __func__);
+}
+
+/*
+ * Called from nfs4_state_manager thread for session setup, so don't recover
+ * from sequence operation or clientid errors.
+ */
+static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_get_lease_time_data *data =
+			(struct nfs4_get_lease_time_data *)calldata;
+
+	dprintk("--> %s\n", __func__);
+	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+	case -NFS4ERR_GRACE:
+		dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
+		rpc_delay(task, NFS4_POLL_RETRY_MIN);
+		task->tk_status = 0;
+		nfs4_restart_rpc(task, data->clp);
+		return;
+	}
+	nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res);
+	dprintk("<-- %s\n", __func__);
+}
+
+struct rpc_call_ops nfs4_get_lease_time_ops = {
+	.rpc_call_prepare = nfs4_get_lease_time_prepare,
+	.rpc_call_done = nfs4_get_lease_time_done,
+};
+
+int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
+{
+	struct rpc_task *task;
+	struct nfs4_get_lease_time_args args;
+	struct nfs4_get_lease_time_res res = {
+		.lr_fsinfo = fsinfo,
+	};
+	struct nfs4_get_lease_time_data data = {
+		.args = &args,
+		.res = &res,
+		.clp = clp,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	struct rpc_task_setup task_setup = {
+		.rpc_client = clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_get_lease_time_ops,
+		.callback_data = &data
+	};
+	int status;
+
+	res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
+	dprintk("--> %s\n", __func__);
+	task = rpc_run_task(&task_setup);
+
+	if (IS_ERR(task))
+		status = PTR_ERR(task);
+	else {
+		status = task->tk_status;
+		rpc_put_task(task);
+	}
+	dprintk("<-- %s return %d\n", __func__, status);
+
+	return status;
+}
+
+/*
+ * Reset a slot table
+ */
+static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots,
+		int old_max_slots, int ivalue)
+{
+	int i;
+	int ret = 0;
+
+	dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl);
+
+	/*
+	 * Until we have dynamic slot table adjustment, insist
+	 * upon the same slot table size
+	 */
+	if (max_slots != old_max_slots) {
+		dprintk("%s reset slot table does't match old\n",
+			__func__);
+		ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */
+		goto out;
+	}
+	spin_lock(&tbl->slot_tbl_lock);
+	for (i = 0; i < max_slots; ++i)
+		tbl->slots[i].seq_nr = ivalue;
+	tbl->highest_used_slotid = -1;
+	spin_unlock(&tbl->slot_tbl_lock);
+	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+		tbl, tbl->slots, tbl->max_slots);
+out:
+	dprintk("<-- %s: return %d\n", __func__, ret);
+	return ret;
+}
+
+/*
+ * Reset the forechannel and backchannel slot tables
+ */
+static int nfs4_reset_slot_tables(struct nfs4_session *session)
+{
+	int status;
+
+	status = nfs4_reset_slot_table(&session->fc_slot_table,
+			session->fc_attrs.max_reqs,
+			session->fc_slot_table.max_slots,
+			1);
+	if (status)
+		return status;
+
+	status = nfs4_reset_slot_table(&session->bc_slot_table,
+			session->bc_attrs.max_reqs,
+			session->bc_slot_table.max_slots,
+			0);
+	return status;
+}
+
+/* Destroy the slot table */
+static void nfs4_destroy_slot_tables(struct nfs4_session *session)
+{
+	if (session->fc_slot_table.slots != NULL) {
+		kfree(session->fc_slot_table.slots);
+		session->fc_slot_table.slots = NULL;
+	}
+	if (session->bc_slot_table.slots != NULL) {
+		kfree(session->bc_slot_table.slots);
+		session->bc_slot_table.slots = NULL;
+	}
+	return;
+}
+
+/*
+ * Initialize slot table
+ */
+static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
+		int max_slots, int ivalue)
+{
+	int i;
+	struct nfs4_slot *slot;
+	int ret = -ENOMEM;
+
+	BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
+
+	dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
+
+	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
+	if (!slot)
+		goto out;
+	for (i = 0; i < max_slots; ++i)
+		slot[i].seq_nr = ivalue;
+	ret = 0;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->slots != NULL) {
+		spin_unlock(&tbl->slot_tbl_lock);
+		dprintk("%s: slot table already initialized. tbl=%p slots=%p\n",
+			__func__, tbl, tbl->slots);
+		WARN_ON(1);
+		goto out_free;
+	}
+	tbl->max_slots = max_slots;
+	tbl->slots = slot;
+	tbl->highest_used_slotid = -1;  /* no slot is currently used */
+	spin_unlock(&tbl->slot_tbl_lock);
+	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+		tbl, tbl->slots, tbl->max_slots);
+out:
+	dprintk("<-- %s: return %d\n", __func__, ret);
+	return ret;
+
+out_free:
+	kfree(slot);
+	goto out;
+}
+
+/*
+ * Initialize the forechannel and backchannel tables
+ */
+static int nfs4_init_slot_tables(struct nfs4_session *session)
+{
+	int status;
+
+	status = nfs4_init_slot_table(&session->fc_slot_table,
+			session->fc_attrs.max_reqs, 1);
+	if (status)
+		return status;
+
+	status = nfs4_init_slot_table(&session->bc_slot_table,
+			session->bc_attrs.max_reqs, 0);
+	if (status)
+		nfs4_destroy_slot_tables(session);
+
+	return status;
+}
+
+struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
+{
+	struct nfs4_session *session;
+	struct nfs4_slot_table *tbl;
+
+	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
+	if (!session)
+		return NULL;
+
+	set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+	/*
+	 * The create session reply races with the server back
+	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+	 * so that the client back channel can find the
+	 * nfs_client struct
+	 */
+	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+
+	tbl = &session->fc_slot_table;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+
+	tbl = &session->bc_slot_table;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+
+	session->clp = clp;
+	return session;
+}
+
+void nfs4_destroy_session(struct nfs4_session *session)
+{
+	nfs4_proc_destroy_session(session);
+	dprintk("%s Destroy backchannel for xprt %p\n",
+		__func__, session->clp->cl_rpcclient->cl_xprt);
+	xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt,
+				NFS41_BC_MIN_CALLBACKS);
+	nfs4_destroy_slot_tables(session);
+	kfree(session);
+}
+
+/*
+ * Initialize the values to be used by the client in CREATE_SESSION
+ * If nfs4_init_session set the fore channel request and response sizes,
+ * use them.
+ *
+ * Set the back channel max_resp_sz_cached to zero to force the client to
+ * always set csa_cachethis to FALSE because the current implementation
+ * of the back channel DRC only supports caching the CB_SEQUENCE operation.
+ */
+static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
+{
+	struct nfs4_session *session = args->client->cl_session;
+	unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
+		     mxresp_sz = session->fc_attrs.max_resp_sz;
+
+	if (mxrqst_sz == 0)
+		mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
+	if (mxresp_sz == 0)
+		mxresp_sz = NFS_MAX_FILE_IO_SIZE;
+	/* Fore channel attributes */
+	args->fc_attrs.headerpadsz = 0;
+	args->fc_attrs.max_rqst_sz = mxrqst_sz;
+	args->fc_attrs.max_resp_sz = mxresp_sz;
+	args->fc_attrs.max_resp_sz_cached = mxresp_sz;
+	args->fc_attrs.max_ops = NFS4_MAX_OPS;
+	args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
+
+	dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
+		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
+		__func__,
+		args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz,
+		args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops,
+		args->fc_attrs.max_reqs);
+
+	/* Back channel attributes */
+	args->bc_attrs.headerpadsz = 0;
+	args->bc_attrs.max_rqst_sz = PAGE_SIZE;
+	args->bc_attrs.max_resp_sz = PAGE_SIZE;
+	args->bc_attrs.max_resp_sz_cached = 0;
+	args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
+	args->bc_attrs.max_reqs = 1;
+
+	dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
+		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
+		__func__,
+		args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz,
+		args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops,
+		args->bc_attrs.max_reqs);
+}
+
+static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd)
+{
+	if (rcvd <= sent)
+		return 0;
+	printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. "
+		"sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd);
+	return -EINVAL;
+}
+
+#define _verify_fore_channel_attr(_name_) \
+	_verify_channel_attr("fore", #_name_, \
+			     args->fc_attrs._name_, \
+			     session->fc_attrs._name_)
+
+#define _verify_back_channel_attr(_name_) \
+	_verify_channel_attr("back", #_name_, \
+			     args->bc_attrs._name_, \
+			     session->bc_attrs._name_)
+
+/*
+ * The server is not allowed to increase the fore channel header pad size,
+ * maximum response size, or maximum number of operations.
+ *
+ * The back channel attributes are only negotiatied down: We send what the
+ * (back channel) server insists upon.
+ */
+static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
+				     struct nfs4_session *session)
+{
+	int ret = 0;
+
+	ret |= _verify_fore_channel_attr(headerpadsz);
+	ret |= _verify_fore_channel_attr(max_resp_sz);
+	ret |= _verify_fore_channel_attr(max_ops);
+
+	ret |= _verify_back_channel_attr(headerpadsz);
+	ret |= _verify_back_channel_attr(max_rqst_sz);
+	ret |= _verify_back_channel_attr(max_resp_sz);
+	ret |= _verify_back_channel_attr(max_resp_sz_cached);
+	ret |= _verify_back_channel_attr(max_ops);
+	ret |= _verify_back_channel_attr(max_reqs);
+
+	return ret;
+}
+
+static int _nfs4_proc_create_session(struct nfs_client *clp)
+{
+	struct nfs4_session *session = clp->cl_session;
+	struct nfs41_create_session_args args = {
+		.client = clp,
+		.cb_program = NFS4_CALLBACK,
+	};
+	struct nfs41_create_session_res res = {
+		.client = clp,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	int status;
+
+	nfs4_init_channel_attrs(&args);
+	args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
+
+	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
+
+	if (!status)
+		/* Verify the session's negotiated channel_attrs values */
+		status = nfs4_verify_channel_attrs(&args, session);
+	if (!status) {
+		/* Increment the clientid slot sequence id */
+		clp->cl_seqid++;
+	}
+
+	return status;
+}
+
+/*
+ * Issues a CREATE_SESSION operation to the server.
+ * It is the responsibility of the caller to verify the session is
+ * expired before calling this routine.
+ */
+int nfs4_proc_create_session(struct nfs_client *clp, int reset)
+{
+	int status;
+	unsigned *ptr;
+	struct nfs_fsinfo fsinfo;
+	struct nfs4_session *session = clp->cl_session;
+
+	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
+
+	status = _nfs4_proc_create_session(clp);
+	if (status)
+		goto out;
+
+	/* Init or reset the fore channel */
+	if (reset)
+		status = nfs4_reset_slot_tables(session);
+	else
+		status = nfs4_init_slot_tables(session);
+	dprintk("fore channel slot table initialization returned %d\n", status);
+	if (status)
+		goto out;
+
+	ptr = (unsigned *)&session->sess_id.data[0];
+	dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
+		clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
+
+	if (reset)
+		/* Lease time is aleady set */
+		goto out;
+
+	/* Get the lease time */
+	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+	if (status == 0) {
+		/* Update lease time and schedule renewal */
+		spin_lock(&clp->cl_lock);
+		clp->cl_lease_time = fsinfo.lease_time * HZ;
+		clp->cl_last_renewal = jiffies;
+		clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		spin_unlock(&clp->cl_lock);
+
+		nfs4_schedule_state_renewal(clp);
+	}
+out:
+	dprintk("<-- %s\n", __func__);
+	return status;
+}
+
+/*
+ * Issue the over-the-wire RPC DESTROY_SESSION.
+ * The caller must serialize access to this routine.
+ */
+int nfs4_proc_destroy_session(struct nfs4_session *session)
+{
+	int status = 0;
+	struct rpc_message msg;
+
+	dprintk("--> nfs4_proc_destroy_session\n");
+
+	/* session is still being setup */
+	if (session->clp->cl_cons_state != NFS_CS_READY)
+		return status;
+
+	msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
+	msg.rpc_argp = session;
+	msg.rpc_resp = NULL;
+	msg.rpc_cred = NULL;
+	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
+
+	if (status)
+		printk(KERN_WARNING
+			"Got error %d from the server on DESTROY_SESSION. "
+			"Session has been destroyed regardless...\n", status);
+
+	dprintk("<-- nfs4_proc_destroy_session\n");
+	return status;
+}
+
+/*
+ * Renew the cl_session lease.
+ */
+static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+{
+	struct nfs4_sequence_args args;
+	struct nfs4_sequence_res res;
+
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+		.rpc_cred = cred,
+	};
+
+	args.sa_cache_this = 0;
+
+	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+				       &res, 0);
+}
+
+void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+{
+	struct nfs_client *clp = (struct nfs_client *)data;
+
+	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
+
+	if (task->tk_status < 0) {
+		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+
+		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+								== -EAGAIN) {
+			nfs4_restart_rpc(task, clp);
+			return;
+		}
+	}
+	nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp);
+	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+
+	put_rpccred(task->tk_msg.rpc_cred);
+	kfree(task->tk_msg.rpc_argp);
+	kfree(task->tk_msg.rpc_resp);
+
+	dprintk("<-- %s\n", __func__);
+}
+
+static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+{
+	struct nfs_client *clp;
+	struct nfs4_sequence_args *args;
+	struct nfs4_sequence_res *res;
+
+	clp = (struct nfs_client *)data;
+	args = task->tk_msg.rpc_argp;
+	res = task->tk_msg.rpc_resp;
+
+	if (nfs4_setup_sequence(clp, args, res, 0, task))
+		return;
+	rpc_call_start(task);
+}
+
+static const struct rpc_call_ops nfs41_sequence_ops = {
+	.rpc_call_done = nfs41_sequence_call_done,
+	.rpc_call_prepare = nfs41_sequence_prepare,
+};
+
+static int nfs41_proc_async_sequence(struct nfs_client *clp,
+				     struct rpc_cred *cred)
+{
+	struct nfs4_sequence_args *args;
+	struct nfs4_sequence_res *res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+		.rpc_cred = cred,
+	};
+
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (!res) {
+		kfree(args);
+		return -ENOMEM;
+	}
+	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	msg.rpc_argp = args;
+	msg.rpc_resp = res;
+
+	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+			      &nfs41_sequence_ops, (void *)clp);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
 	.state_flag_bit	= NFS_STATE_RECLAIM_REBOOT,
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
+	.establish_clid = nfs4_init_clientid,
+	.get_clid_cred	= nfs4_get_setclientid_cred,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
+	.owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
+	.state_flag_bit	= NFS_STATE_RECLAIM_REBOOT,
+	.recover_open	= nfs4_open_reclaim,
+	.recover_lock	= nfs4_lock_reclaim,
+	.establish_clid = nfs4_proc_exchange_id,
+	.get_clid_cred	= nfs4_get_exchange_id_cred,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
+struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
+	.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
+	.state_flag_bit	= NFS_STATE_RECLAIM_NOGRACE,
+	.recover_open	= nfs4_open_expired,
+	.recover_lock	= nfs4_lock_expired,
+	.establish_clid = nfs4_init_clientid,
+	.get_clid_cred	= nfs4_get_setclientid_cred,
 };
 
-struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = {
+#if defined(CONFIG_NFS_V4_1)
+struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
 	.state_flag_bit	= NFS_STATE_RECLAIM_NOGRACE,
 	.recover_open	= nfs4_open_expired,
 	.recover_lock	= nfs4_lock_expired,
+	.establish_clid = nfs4_proc_exchange_id,
+	.get_clid_cred	= nfs4_get_exchange_id_cred,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
+struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
+	.sched_state_renewal = nfs4_proc_async_renew,
+	.get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
+	.renew_lease = nfs4_proc_renew,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
+	.sched_state_renewal = nfs41_proc_async_sequence,
+	.get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
+	.renew_lease = nfs4_proc_sequence,
+};
+#endif
+
+/*
+ * Per minor version reboot and network partition recovery ops
+ */
+
+struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+	&nfs40_reboot_recovery_ops,
+#if defined(CONFIG_NFS_V4_1)
+	&nfs41_reboot_recovery_ops,
+#endif
+};
+
+struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+	&nfs40_nograce_recovery_ops,
+#if defined(CONFIG_NFS_V4_1)
+	&nfs41_nograce_recovery_ops,
+#endif
+};
+
+struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+	&nfs40_state_renewal_ops,
+#if defined(CONFIG_NFS_V4_1)
+	&nfs41_state_renewal_ops,
+#endif
 };
 
 static const struct inode_operations nfs4_file_inode_operations = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index f524e932ff7b..e27c6cef18f2 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,12 +59,14 @@
 void
 nfs4_renew_state(struct work_struct *work)
 {
+	struct nfs4_state_maintenance_ops *ops;
 	struct nfs_client *clp =
 		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
 
+	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
 	dprintk("%s: start\n", __func__);
 	/* Are there any active superblocks? */
 	if (list_empty(&clp->cl_superblocks))
@@ -76,7 +78,7 @@ nfs4_renew_state(struct work_struct *work)
 	timeout = (2 * lease) / 3 + (long)last - (long)now;
 	/* Are we close to a lease timeout? */
 	if (time_after(now, last + lease/3)) {
-		cred = nfs4_get_renew_cred_locked(clp);
+		cred = ops->get_state_renewal_cred_locked(clp);
 		spin_unlock(&clp->cl_lock);
 		if (cred == NULL) {
 			if (list_empty(&clp->cl_delegations)) {
@@ -86,7 +88,7 @@ nfs4_renew_state(struct work_struct *work)
 			nfs_expire_all_delegations(clp);
 		} else {
 			/* Queue an asynchronous RENEW. */
-			nfs4_proc_async_renew(clp, cred);
+			ops->sched_state_renewal(clp, cred);
 			put_rpccred(cred);
 		}
 		timeout = (2 * lease) / 3;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0298e909559f..b73c5a728655 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -60,7 +60,7 @@ const nfs4_stateid zero_stateid;
 
 static LIST_HEAD(nfs4_clientid_list);
 
-static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	unsigned short port;
 	int status;
@@ -77,7 +77,7 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
-static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
+struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
 {
 	struct rpc_cred *cred = NULL;
 
@@ -114,17 +114,21 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
 	return cred;
 }
 
-static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
+#if defined(CONFIG_NFS_V4_1)
+
+struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
 {
 	struct rpc_cred *cred;
 
 	spin_lock(&clp->cl_lock);
-	cred = nfs4_get_renew_cred_locked(clp);
+	cred = nfs4_get_machine_cred_locked(clp);
 	spin_unlock(&clp->cl_lock);
 	return cred;
 }
 
-static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+#endif /* CONFIG_NFS_V4_1 */
+
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
@@ -738,12 +742,14 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 
 void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 {
-	if (status == -NFS4ERR_BAD_SEQID) {
-		struct nfs4_state_owner *sp = container_of(seqid->sequence,
-				struct nfs4_state_owner, so_seqid);
+	struct nfs4_state_owner *sp = container_of(seqid->sequence,
+					struct nfs4_state_owner, so_seqid);
+	struct nfs_server *server = sp->so_server;
+
+	if (status == -NFS4ERR_BAD_SEQID)
 		nfs4_drop_state_owner(sp);
-	}
-	nfs_increment_seqid(status, seqid);
+	if (!nfs4_has_session(server->nfs_client))
+		nfs_increment_seqid(status, seqid);
 }
 
 /*
@@ -847,32 +853,45 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 	struct file_lock *fl;
 	int status = 0;
 
+	if (inode->i_flock == NULL)
+		return 0;
+
+	/* Guard against delegation returns and new lock/unlock calls */
 	down_write(&nfsi->rwsem);
+	/* Protect inode->i_flock using the BKL */
+	lock_kernel();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
 		if (nfs_file_open_context(fl->fl_file)->state != state)
 			continue;
+		unlock_kernel();
 		status = ops->recover_lock(state, fl);
-		if (status >= 0)
-			continue;
 		switch (status) {
+			case 0:
+				break;
+			case -ESTALE:
+			case -NFS4ERR_ADMIN_REVOKED:
+			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_BAD_STATEID:
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out;
 			default:
 				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
 						__func__, status);
-			case -NFS4ERR_EXPIRED:
-			case -NFS4ERR_NO_GRACE:
+			case -ENOMEM:
+			case -NFS4ERR_DENIED:
 			case -NFS4ERR_RECLAIM_BAD:
 			case -NFS4ERR_RECLAIM_CONFLICT:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
-				break;
-			case -NFS4ERR_STALE_CLIENTID:
-				goto out_err;
+				status = 0;
 		}
+		lock_kernel();
 	}
-	up_write(&nfsi->rwsem);
-	return 0;
-out_err:
+	unlock_kernel();
+out:
 	up_write(&nfsi->rwsem);
 	return status;
 }
@@ -918,6 +937,7 @@ restart:
 				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
 						__func__, status);
 			case -ENOENT:
+			case -ENOMEM:
 			case -ESTALE:
 				/*
 				 * Open state on this file cannot be recovered
@@ -928,6 +948,9 @@ restart:
 				/* Mark the file as being 'closed' */
 				state->state = 0;
 				break;
+			case -NFS4ERR_ADMIN_REVOKED:
+			case -NFS4ERR_STALE_STATEID:
+			case -NFS4ERR_BAD_STATEID:
 			case -NFS4ERR_RECLAIM_BAD:
 			case -NFS4ERR_RECLAIM_CONFLICT:
 				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
@@ -1042,6 +1065,14 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 		case -NFS4ERR_EXPIRED:
 			set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 			nfs4_state_start_reclaim_nograce(clp);
+		case -NFS4ERR_BADSESSION:
+		case -NFS4ERR_BADSLOT:
+		case -NFS4ERR_BAD_HIGH_SLOT:
+		case -NFS4ERR_DEADSESSION:
+		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+		case -NFS4ERR_SEQ_FALSE_RETRY:
+		case -NFS4ERR_SEQ_MISORDERED:
+			set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
 	}
 }
 
@@ -1075,18 +1106,22 @@ restart:
 static int nfs4_check_lease(struct nfs_client *clp)
 {
 	struct rpc_cred *cred;
+	struct nfs4_state_maintenance_ops *ops =
+		nfs4_state_renewal_ops[clp->cl_minorversion];
 	int status = -NFS4ERR_EXPIRED;
 
 	/* Is the client already known to have an expired lease? */
 	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		return 0;
-	cred = nfs4_get_renew_cred(clp);
+	spin_lock(&clp->cl_lock);
+	cred = ops->get_state_renewal_cred_locked(clp);
+	spin_unlock(&clp->cl_lock);
 	if (cred == NULL) {
 		cred = nfs4_get_setclientid_cred(clp);
 		if (cred == NULL)
 			goto out;
 	}
-	status = nfs4_proc_renew(clp, cred);
+	status = ops->renew_lease(clp, cred);
 	put_rpccred(cred);
 out:
 	nfs4_recovery_handle_error(clp, status);
@@ -1096,21 +1131,98 @@ out:
 static int nfs4_reclaim_lease(struct nfs_client *clp)
 {
 	struct rpc_cred *cred;
+	struct nfs4_state_recovery_ops *ops =
+		nfs4_reboot_recovery_ops[clp->cl_minorversion];
 	int status = -ENOENT;
 
-	cred = nfs4_get_setclientid_cred(clp);
+	cred = ops->get_clid_cred(clp);
 	if (cred != NULL) {
-		status = nfs4_init_client(clp, cred);
+		status = ops->establish_clid(clp, cred);
 		put_rpccred(cred);
 		/* Handle case where the user hasn't set up machine creds */
 		if (status == -EACCES && cred == clp->cl_machine_cred) {
 			nfs4_clear_machine_cred(clp);
 			status = -EAGAIN;
 		}
+		if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
+			status = -EPROTONOSUPPORT;
+	}
+	return status;
+}
+
+#ifdef CONFIG_NFS_V4_1
+static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err)
+{
+	switch (err) {
+	case -NFS4ERR_STALE_CLIENTID:
+		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+	}
+}
+
+static int nfs4_reset_session(struct nfs_client *clp)
+{
+	int status;
+
+	status = nfs4_proc_destroy_session(clp->cl_session);
+	if (status && status != -NFS4ERR_BADSESSION &&
+	    status != -NFS4ERR_DEADSESSION) {
+		nfs4_session_recovery_handle_error(clp, status);
+		goto out;
 	}
+
+	memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
+	status = nfs4_proc_create_session(clp, 1);
+	if (status)
+		nfs4_session_recovery_handle_error(clp, status);
+		/* fall through*/
+out:
+	/* Wake up the next rpc task even on error */
+	rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq);
 	return status;
 }
 
+static int nfs4_initialize_session(struct nfs_client *clp)
+{
+	int status;
+
+	status = nfs4_proc_create_session(clp, 0);
+	if (!status) {
+		nfs_mark_client_ready(clp, NFS_CS_READY);
+	} else if (status == -NFS4ERR_STALE_CLIENTID) {
+		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+	} else {
+		nfs_mark_client_ready(clp, status);
+	}
+	return status;
+}
+#else /* CONFIG_NFS_V4_1 */
+static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
+static int nfs4_initialize_session(struct nfs_client *clp) { return 0; }
+#endif /* CONFIG_NFS_V4_1 */
+
+/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
+ * on EXCHANGE_ID for v4.1
+ */
+static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
+{
+	if (nfs4_has_session(clp)) {
+		switch (status) {
+		case -NFS4ERR_DELAY:
+		case -NFS4ERR_CLID_INUSE:
+		case -EAGAIN:
+			break;
+
+		case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+					 * in nfs4_exchange_id */
+		default:
+			return;
+		}
+	}
+	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+}
+
 static void nfs4_state_manager(struct nfs_client *clp)
 {
 	int status = 0;
@@ -1121,9 +1233,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			/* We're going to have to re-establish a clientid */
 			status = nfs4_reclaim_lease(clp);
 			if (status) {
-				set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+				nfs4_set_lease_expired(clp, status);
 				if (status == -EAGAIN)
 					continue;
+				if (clp->cl_cons_state ==
+							NFS_CS_SESSION_INITING)
+					nfs_mark_client_ready(clp, status);
 				goto out_error;
 			}
 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
@@ -1134,25 +1249,44 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			if (status != 0)
 				continue;
 		}
-
+		/* Initialize or reset the session */
+		if (nfs4_has_session(clp) &&
+		   test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
+			if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+				status = nfs4_initialize_session(clp);
+			else
+				status = nfs4_reset_session(clp);
+			if (status) {
+				if (status == -NFS4ERR_STALE_CLIENTID)
+					continue;
+				goto out_error;
+			}
+		}
 		/* First recover reboot state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
-			status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
+			status = nfs4_do_reclaim(clp,
+				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
 			if (status == -NFS4ERR_STALE_CLIENTID)
 				continue;
+			if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
+				continue;
 			nfs4_state_end_reclaim_reboot(clp);
 			continue;
 		}
 
 		/* Now recover expired state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
-			status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
+			status = nfs4_do_reclaim(clp,
+				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
 			if (status < 0) {
 				set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
 				if (status == -NFS4ERR_STALE_CLIENTID)
 					continue;
 				if (status == -NFS4ERR_EXPIRED)
 					continue;
+				if (test_bit(NFS4CLNT_SESSION_SETUP,
+								&clp->cl_state))
+					continue;
 				goto out_error;
 			} else
 				nfs4_state_end_reclaim_nograce(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1690f0e44b91..617273e7d47f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -192,12 +192,16 @@ static int nfs4_stat_to_errno(int);
 				 decode_verifier_maxsz)
 #define encode_remove_maxsz	(op_encode_hdr_maxsz + \
 				nfs4_name_maxsz)
+#define decode_remove_maxsz	(op_decode_hdr_maxsz + \
+				 decode_change_info_maxsz)
 #define encode_rename_maxsz	(op_encode_hdr_maxsz + \
 				2 * nfs4_name_maxsz)
-#define decode_rename_maxsz	(op_decode_hdr_maxsz + 5 + 5)
+#define decode_rename_maxsz	(op_decode_hdr_maxsz + \
+				 decode_change_info_maxsz + \
+				 decode_change_info_maxsz)
 #define encode_link_maxsz	(op_encode_hdr_maxsz + \
 				nfs4_name_maxsz)
-#define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
+#define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
 #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
 				 7 + \
 				 1 + encode_stateid_maxsz + 8)
@@ -240,43 +244,115 @@ static int nfs4_stat_to_errno(int);
 				(encode_getattr_maxsz)
 #define decode_fs_locations_maxsz \
 				(0)
+
+#if defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MACHINE_NAME_LEN (64)
+
+#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
+				encode_verifier_maxsz + \
+				1 /* co_ownerid.len */ + \
+				XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
+				1 /* flags */ + \
+				1 /* spa_how */ + \
+				0 /* SP4_NONE (for now) */ + \
+				1 /* zero implemetation id array */)
+#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
+				2 /* eir_clientid */ + \
+				1 /* eir_sequenceid */ + \
+				1 /* eir_flags */ + \
+				1 /* spr_how */ + \
+				0 /* SP4_NONE (for now) */ + \
+				2 /* eir_server_owner.so_minor_id */ + \
+				/* eir_server_owner.so_major_id<> */ \
+				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+				/* eir_server_scope<> */ \
+				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+				1 /* eir_server_impl_id array length */ + \
+				0 /* ignored eir_server_impl_id contents */)
+#define encode_channel_attrs_maxsz  (6 + 1 /* ca_rdma_ird.len (0) */)
+#define decode_channel_attrs_maxsz  (6 + \
+				     1 /* ca_rdma_ird.len */ + \
+				     1 /* ca_rdma_ird */)
+#define encode_create_session_maxsz  (op_encode_hdr_maxsz + \
+				     2 /* csa_clientid */ + \
+				     1 /* csa_sequence */ + \
+				     1 /* csa_flags */ + \
+				     encode_channel_attrs_maxsz + \
+				     encode_channel_attrs_maxsz + \
+				     1 /* csa_cb_program */ + \
+				     1 /* csa_sec_parms.len (1) */ + \
+				     1 /* cb_secflavor (AUTH_SYS) */ + \
+				     1 /* stamp */ + \
+				     1 /* machinename.len */ + \
+				     XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \
+				     1 /* uid */ + \
+				     1 /* gid */ + \
+				     1 /* gids.len (0) */)
+#define decode_create_session_maxsz  (op_decode_hdr_maxsz +	\
+				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+				     1 /* csr_sequence */ + \
+				     1 /* csr_flags */ + \
+				     decode_channel_attrs_maxsz + \
+				     decode_channel_attrs_maxsz)
+#define encode_destroy_session_maxsz    (op_encode_hdr_maxsz + 4)
+#define decode_destroy_session_maxsz    (op_decode_hdr_maxsz)
+#define encode_sequence_maxsz	(op_encode_hdr_maxsz + \
+				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
+#define decode_sequence_maxsz	(op_decode_hdr_maxsz + \
+				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+#else /* CONFIG_NFS_V4_1 */
+#define encode_sequence_maxsz	0
+#define decode_sequence_maxsz	0
+#endif /* CONFIG_NFS_V4_1 */
+
 #define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_dec_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_enc_read_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_read_maxsz)
 #define NFS4_dec_read_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_read_maxsz)
 #define NFS4_enc_readlink_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_readlink_maxsz)
 #define NFS4_dec_readlink_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_readlink_maxsz)
 #define NFS4_enc_readdir_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_readdir_maxsz)
 #define NFS4_dec_readdir_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_readdir_maxsz)
 #define NFS4_enc_write_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_write_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_write_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_write_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_commit_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_commit_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_commit_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_commit_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_open_maxsz + \
@@ -285,6 +361,7 @@ static int nfs4_stat_to_errno(int);
 				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_open_maxsz + \
@@ -301,43 +378,53 @@ static int nfs4_stat_to_errno(int);
 				 decode_putfh_maxsz + \
 				 decode_open_confirm_maxsz)
 #define NFS4_enc_open_noattr_sz	(compound_encode_hdr_maxsz + \
+					encode_sequence_maxsz + \
 					encode_putfh_maxsz + \
 					encode_open_maxsz + \
 					encode_getattr_maxsz)
 #define NFS4_dec_open_noattr_sz	(compound_decode_hdr_maxsz + \
+					decode_sequence_maxsz + \
 					decode_putfh_maxsz + \
 					decode_open_maxsz + \
 					decode_getattr_maxsz)
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
+				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
 				 encode_open_downgrade_maxsz + \
 				 encode_getattr_maxsz)
 #define NFS4_dec_open_downgrade_sz \
 				(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_open_downgrade_maxsz + \
 				 decode_getattr_maxsz)
 #define NFS4_enc_close_sz	(compound_encode_hdr_maxsz + \
+				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
 				 encode_close_maxsz + \
 				 encode_getattr_maxsz)
 #define NFS4_dec_close_sz	(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_close_maxsz + \
 				 decode_getattr_maxsz)
 #define NFS4_enc_setattr_sz	(compound_encode_hdr_maxsz + \
+				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
 				 encode_setattr_maxsz + \
 				 encode_getattr_maxsz)
 #define NFS4_dec_setattr_sz	(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_setattr_maxsz + \
 				 decode_getattr_maxsz)
 #define NFS4_enc_fsinfo_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_fsinfo_maxsz)
 #define NFS4_dec_fsinfo_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_fsinfo_maxsz)
 #define NFS4_enc_renew_sz	(compound_encode_hdr_maxsz + \
@@ -359,64 +446,81 @@ static int nfs4_stat_to_errno(int);
 				decode_putrootfh_maxsz + \
 				decode_fsinfo_maxsz)
 #define NFS4_enc_lock_sz        (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_lock_maxsz)
 #define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_lock_maxsz)
 #define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_lockt_maxsz)
 #define NFS4_dec_lockt_sz       (compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_lockt_maxsz)
 #define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_locku_maxsz)
 #define NFS4_dec_locku_sz       (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_locku_maxsz)
 #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_access_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_access_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_access_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_getattr_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_getattr_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_lookup_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_lookup_maxsz + \
 				encode_getattr_maxsz + \
 				encode_getfh_maxsz)
 #define NFS4_dec_lookup_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putrootfh_maxsz + \
 				encode_getattr_maxsz + \
 				encode_getfh_maxsz)
 #define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putrootfh_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_remove_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 5 + \
+				decode_remove_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
@@ -425,6 +529,7 @@ static int nfs4_stat_to_errno(int);
 				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
@@ -433,6 +538,7 @@ static int nfs4_stat_to_errno(int);
 				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
@@ -441,6 +547,7 @@ static int nfs4_stat_to_errno(int);
 				encode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
@@ -449,16 +556,19 @@ static int nfs4_stat_to_errno(int);
 				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_symlink_maxsz + \
 				encode_getattr_maxsz + \
 				encode_getfh_maxsz)
 #define NFS4_dec_symlink_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_symlink_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_create_maxsz + \
@@ -467,6 +577,7 @@ static int nfs4_stat_to_errno(int);
 				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_create_maxsz + \
@@ -475,52 +586,98 @@ static int nfs4_stat_to_errno(int);
 				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_pathconf_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_statfs_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_statfs_maxsz)
 #define NFS4_dec_statfs_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_statfs_maxsz)
 #define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_delegreturn_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_delegreturn_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_delegreturn_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getacl_maxsz)
 #define NFS4_dec_getacl_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getacl_maxsz)
 #define NFS4_enc_setacl_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_setacl_maxsz)
 #define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_setacl_maxsz)
 #define NFS4_enc_fs_locations_sz \
 				(compound_encode_hdr_maxsz + \
+				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
 				 encode_lookup_maxsz + \
 				 encode_fs_locations_maxsz)
 #define NFS4_dec_fs_locations_sz \
 				(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
 				 decode_lookup_maxsz + \
 				 decode_fs_locations_maxsz)
+#if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_exchange_id_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_exchange_id_maxsz)
+#define NFS4_dec_exchange_id_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_exchange_id_maxsz)
+#define NFS4_enc_create_session_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_create_session_maxsz)
+#define NFS4_dec_create_session_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_create_session_maxsz)
+#define NFS4_enc_destroy_session_sz	(compound_encode_hdr_maxsz + \
+					 encode_destroy_session_maxsz)
+#define NFS4_dec_destroy_session_sz	(compound_decode_hdr_maxsz + \
+					 decode_destroy_session_maxsz)
+#define NFS4_enc_sequence_sz \
+				(compound_decode_hdr_maxsz + \
+				 encode_sequence_maxsz)
+#define NFS4_dec_sequence_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_sequence_maxsz)
+#define NFS4_enc_get_lease_time_sz	(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_putrootfh_maxsz + \
+					 encode_fsinfo_maxsz)
+#define NFS4_dec_get_lease_time_sz	(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_putrootfh_maxsz + \
+					 decode_fsinfo_maxsz)
+#endif /* CONFIG_NFS_V4_1 */
 
 static const umode_t nfs_type2fmt[] = {
 	[NF4BAD] = 0,
@@ -541,6 +698,8 @@ struct compound_hdr {
 	__be32 *	nops_p;
 	uint32_t	taglen;
 	char *		tag;
+	uint32_t	replen;		/* expected reply words */
+	u32		minorversion;
 };
 
 /*
@@ -576,22 +735,31 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *
 	xdr_encode_opaque(p, str, len);
 }
 
-static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
+static void encode_compound_hdr(struct xdr_stream *xdr,
+				struct rpc_rqst *req,
+				struct compound_hdr *hdr)
 {
 	__be32 *p;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+
+	/* initialize running count of expected bytes in reply.
+	 * NOTE: the replied tag SHOULD be the same is the one sent,
+	 * but this is not required as a MUST for the server to do so. */
+	hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
 
 	dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
 	BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
 	RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
 	WRITE32(hdr->taglen);
 	WRITEMEM(hdr->tag, hdr->taglen);
-	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->minorversion);
 	hdr->nops_p = p;
 	WRITE32(hdr->nops);
 }
 
 static void encode_nops(struct compound_hdr *hdr)
 {
+	BUG_ON(hdr->nops > NFS4_MAX_OPS);
 	*hdr->nops_p = htonl(hdr->nops);
 }
 
@@ -736,6 +904,7 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
 	WRITE32(OP_ACCESS);
 	WRITE32(access);
 	hdr->nops++;
+	hdr->replen += decode_access_maxsz;
 }
 
 static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
@@ -747,6 +916,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
 	WRITE32(arg->seqid->sequence->counter);
 	WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
 	hdr->nops++;
+	hdr->replen += decode_close_maxsz;
 }
 
 static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
@@ -758,6 +928,7 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
 	WRITE64(args->offset);
 	WRITE32(args->count);
 	hdr->nops++;
+	hdr->replen += decode_commit_maxsz;
 }
 
 static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
@@ -789,6 +960,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
 	WRITE32(create->name->len);
 	WRITEMEM(create->name->name, create->name->len);
 	hdr->nops++;
+	hdr->replen += decode_create_maxsz;
 
 	encode_attrs(xdr, create->attrs, create->server);
 }
@@ -802,6 +974,7 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
 	WRITE32(1);
 	WRITE32(bitmap);
 	hdr->nops++;
+	hdr->replen += decode_getattr_maxsz;
 }
 
 static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
@@ -814,6 +987,7 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
 	WRITE32(bm0);
 	WRITE32(bm1);
 	hdr->nops++;
+	hdr->replen += decode_getattr_maxsz;
 }
 
 static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -841,6 +1015,7 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	RESERVE_SPACE(4);
 	WRITE32(OP_GETFH);
 	hdr->nops++;
+	hdr->replen += decode_getfh_maxsz;
 }
 
 static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -852,6 +1027,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
 	WRITE32(name->len);
 	WRITEMEM(name->name, name->len);
 	hdr->nops++;
+	hdr->replen += decode_link_maxsz;
 }
 
 static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -899,6 +1075,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
 		WRITE32(args->lock_seqid->sequence->counter);
 	}
 	hdr->nops++;
+	hdr->replen += decode_lock_maxsz;
 }
 
 static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
@@ -915,6 +1092,7 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
 	WRITEMEM("lock id:", 8);
 	WRITE64(args->lock_owner.id);
 	hdr->nops++;
+	hdr->replen += decode_lockt_maxsz;
 }
 
 static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
@@ -929,6 +1107,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
 	WRITE64(args->fl->fl_start);
 	WRITE64(nfs4_lock_length(args->fl));
 	hdr->nops++;
+	hdr->replen += decode_locku_maxsz;
 }
 
 static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -941,6 +1120,7 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
 	WRITE32(len);
 	WRITEMEM(name->name, len);
 	hdr->nops++;
+	hdr->replen += decode_lookup_maxsz;
 }
 
 static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1080,6 +1260,7 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
 		BUG();
 	}
 	hdr->nops++;
+	hdr->replen += decode_open_maxsz;
 }
 
 static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
@@ -1091,6 +1272,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
 	WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
 	WRITE32(arg->seqid->sequence->counter);
 	hdr->nops++;
+	hdr->replen += decode_open_confirm_maxsz;
 }
 
 static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
@@ -1103,6 +1285,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
 	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->fmode);
 	hdr->nops++;
+	hdr->replen += decode_open_downgrade_maxsz;
 }
 
 static void
@@ -1116,6 +1299,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
 	WRITE32(len);
 	WRITEMEM(fh->data, len);
 	hdr->nops++;
+	hdr->replen += decode_putfh_maxsz;
 }
 
 static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
@@ -1125,6 +1309,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	RESERVE_SPACE(4);
 	WRITE32(OP_PUTROOTFH);
 	hdr->nops++;
+	hdr->replen += decode_putrootfh_maxsz;
 }
 
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
@@ -1153,6 +1338,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 	WRITE64(args->offset);
 	WRITE32(args->count);
 	hdr->nops++;
+	hdr->replen += decode_read_maxsz;
 }
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1178,6 +1364,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	WRITE32(attrs[0] & readdir->bitmask[0]);
 	WRITE32(attrs[1] & readdir->bitmask[1]);
 	hdr->nops++;
+	hdr->replen += decode_readdir_maxsz;
 	dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
 			__func__,
 			(unsigned long long)readdir->cookie,
@@ -1194,6 +1381,7 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
 	RESERVE_SPACE(4);
 	WRITE32(OP_READLINK);
 	hdr->nops++;
+	hdr->replen += decode_readlink_maxsz;
 }
 
 static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
@@ -1205,6 +1393,7 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
 	WRITE32(name->len);
 	WRITEMEM(name->name, name->len);
 	hdr->nops++;
+	hdr->replen += decode_remove_maxsz;
 }
 
 static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
@@ -1220,6 +1409,7 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
 	WRITE32(newname->len);
 	WRITEMEM(newname->name, newname->len);
 	hdr->nops++;
+	hdr->replen += decode_rename_maxsz;
 }
 
 static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
@@ -1230,6 +1420,7 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
 	WRITE32(OP_RENEW);
 	WRITE64(client_stateid->cl_clientid);
 	hdr->nops++;
+	hdr->replen += decode_renew_maxsz;
 }
 
 static void
@@ -1240,6 +1431,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	RESERVE_SPACE(4);
 	WRITE32(OP_RESTOREFH);
 	hdr->nops++;
+	hdr->replen += decode_restorefh_maxsz;
 }
 
 static int
@@ -1259,6 +1451,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 	WRITE32(arg->acl_len);
 	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
 	hdr->nops++;
+	hdr->replen += decode_setacl_maxsz;
 	return 0;
 }
 
@@ -1270,6 +1463,7 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	RESERVE_SPACE(4);
 	WRITE32(OP_SAVEFH);
 	hdr->nops++;
+	hdr->replen += decode_savefh_maxsz;
 }
 
 static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
@@ -1280,6 +1474,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
 	WRITE32(OP_SETATTR);
 	WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
 	hdr->nops++;
+	hdr->replen += decode_setattr_maxsz;
 	encode_attrs(xdr, arg->iap, server);
 }
 
@@ -1299,6 +1494,7 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
 	RESERVE_SPACE(4);
 	WRITE32(setclientid->sc_cb_ident);
 	hdr->nops++;
+	hdr->replen += decode_setclientid_maxsz;
 }
 
 static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
@@ -1310,6 +1506,7 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
 	WRITE64(client_state->cl_clientid);
 	WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
 	hdr->nops++;
+	hdr->replen += decode_setclientid_confirm_maxsz;
 }
 
 static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
@@ -1328,6 +1525,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
 
 	xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
 	hdr->nops++;
+	hdr->replen += decode_write_maxsz;
 }
 
 static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
@@ -1339,11 +1537,163 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
 	WRITE32(OP_DELEGRETURN);
 	WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
 	hdr->nops++;
+	hdr->replen += decode_delegreturn_maxsz;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/* NFSv4.1 operations */
+static void encode_exchange_id(struct xdr_stream *xdr,
+			       struct nfs41_exchange_id_args *args,
+			       struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	RESERVE_SPACE(4 + sizeof(args->verifier->data));
+	WRITE32(OP_EXCHANGE_ID);
+	WRITEMEM(args->verifier->data, sizeof(args->verifier->data));
+
+	encode_string(xdr, args->id_len, args->id);
+
+	RESERVE_SPACE(12);
+	WRITE32(args->flags);
+	WRITE32(0);	/* zero length state_protect4_a */
+	WRITE32(0);	/* zero length implementation id array */
+	hdr->nops++;
+	hdr->replen += decode_exchange_id_maxsz;
+}
+
+static void encode_create_session(struct xdr_stream *xdr,
+				  struct nfs41_create_session_args *args,
+				  struct compound_hdr *hdr)
+{
+	__be32 *p;
+	char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
+	uint32_t len;
+	struct nfs_client *clp = args->client;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_CREATE_SESSION);
+
+	RESERVE_SPACE(8);
+	WRITE64(clp->cl_ex_clid);
+
+	RESERVE_SPACE(8);
+	WRITE32(clp->cl_seqid);			/*Sequence id */
+	WRITE32(args->flags);			/*flags */
+
+	RESERVE_SPACE(2*28);			/* 2 channel_attrs */
+	/* Fore Channel */
+	WRITE32(args->fc_attrs.headerpadsz);	/* header padding size */
+	WRITE32(args->fc_attrs.max_rqst_sz);	/* max req size */
+	WRITE32(args->fc_attrs.max_resp_sz);	/* max resp size */
+	WRITE32(args->fc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
+	WRITE32(args->fc_attrs.max_ops);	/* max operations */
+	WRITE32(args->fc_attrs.max_reqs);	/* max requests */
+	WRITE32(0);				/* rdmachannel_attrs */
+
+	/* Back Channel */
+	WRITE32(args->fc_attrs.headerpadsz);	/* header padding size */
+	WRITE32(args->bc_attrs.max_rqst_sz);	/* max req size */
+	WRITE32(args->bc_attrs.max_resp_sz);	/* max resp size */
+	WRITE32(args->bc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
+	WRITE32(args->bc_attrs.max_ops);	/* max operations */
+	WRITE32(args->bc_attrs.max_reqs);	/* max requests */
+	WRITE32(0);				/* rdmachannel_attrs */
+
+	RESERVE_SPACE(4);
+	WRITE32(args->cb_program);		/* cb_program */
+
+	RESERVE_SPACE(4);			/* # of security flavors */
+	WRITE32(1);
+
+	RESERVE_SPACE(4);
+	WRITE32(RPC_AUTH_UNIX);			/* auth_sys */
+
+	/* authsys_parms rfc1831 */
+	RESERVE_SPACE(4);
+	WRITE32((u32)clp->cl_boot_time.tv_nsec);	/* stamp */
+	len = scnprintf(machine_name, sizeof(machine_name), "%s",
+			clp->cl_ipaddr);
+	RESERVE_SPACE(16 + len);
+	WRITE32(len);
+	WRITEMEM(machine_name, len);
+	WRITE32(0);				/* UID */
+	WRITE32(0);				/* GID */
+	WRITE32(0);				/* No more gids */
+	hdr->nops++;
+	hdr->replen += decode_create_session_maxsz;
+}
+
+static void encode_destroy_session(struct xdr_stream *xdr,
+				   struct nfs4_session *session,
+				   struct compound_hdr *hdr)
+{
+	__be32 *p;
+	RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN);
+	WRITE32(OP_DESTROY_SESSION);
+	WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+	hdr->nops++;
+	hdr->replen += decode_destroy_session_maxsz;
 }
+#endif /* CONFIG_NFS_V4_1 */
+
+static void encode_sequence(struct xdr_stream *xdr,
+			    const struct nfs4_sequence_args *args,
+			    struct compound_hdr *hdr)
+{
+#if defined(CONFIG_NFS_V4_1)
+	struct nfs4_session *session = args->sa_session;
+	struct nfs4_slot_table *tp;
+	struct nfs4_slot *slot;
+	__be32 *p;
+
+	if (!session)
+		return;
+
+	tp = &session->fc_slot_table;
+
+	WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
+	slot = tp->slots + args->sa_slotid;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_SEQUENCE);
+
+	/*
+	 * Sessionid + seqid + slotid + max slotid + cache_this
+	 */
+	dprintk("%s: sessionid=%u:%u:%u:%u seqid=%d slotid=%d "
+		"max_slotid=%d cache_this=%d\n",
+		__func__,
+		((u32 *)session->sess_id.data)[0],
+		((u32 *)session->sess_id.data)[1],
+		((u32 *)session->sess_id.data)[2],
+		((u32 *)session->sess_id.data)[3],
+		slot->seq_nr, args->sa_slotid,
+		tp->highest_used_slotid, args->sa_cache_this);
+	RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16);
+	WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+	WRITE32(slot->seq_nr);
+	WRITE32(args->sa_slotid);
+	WRITE32(tp->highest_used_slotid);
+	WRITE32(args->sa_cache_this);
+	hdr->nops++;
+	hdr->replen += decode_sequence_maxsz;
+#endif /* CONFIG_NFS_V4_1 */
+}
+
 /*
  * END OF "GENERIC" ENCODE ROUTINES.
  */
 
+static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
+{
+#if defined(CONFIG_NFS_V4_1)
+	if (args->sa_session)
+		return args->sa_session->clp->cl_minorversion;
+#endif /* CONFIG_NFS_V4_1 */
+	return 0;
+}
+
 /*
  * Encode an ACCESS request
  */
@@ -1351,11 +1701,12 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_access(&xdr, args->access, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1370,11 +1721,12 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->dir_fh, &hdr);
 	encode_lookup(&xdr, args->name, &hdr);
 	encode_getfh(&xdr, &hdr);
@@ -1390,11 +1742,12 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putrootfh(&xdr, &hdr);
 	encode_getfh(&xdr, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1409,11 +1762,12 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_remove(&xdr, &args->name, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1428,11 +1782,12 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->old_dir, &hdr);
 	encode_savefh(&xdr, &hdr);
 	encode_putfh(&xdr, args->new_dir, &hdr);
@@ -1451,11 +1806,12 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_savefh(&xdr, &hdr);
 	encode_putfh(&xdr, args->dir_fh, &hdr);
@@ -1474,11 +1830,12 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->dir_fh, &hdr);
 	encode_savefh(&xdr, &hdr);
 	encode_create(&xdr, args, &hdr);
@@ -1505,11 +1862,12 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
@@ -1523,11 +1881,12 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_close(&xdr, args, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1542,11 +1901,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_savefh(&xdr, &hdr);
 	encode_open(&xdr, args, &hdr);
@@ -1569,7 +1929,7 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_open_confirm(&xdr, args, &hdr);
 	encode_nops(&hdr);
@@ -1583,11 +1943,12 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_open(&xdr, args, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1602,11 +1963,12 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops	= 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_open_downgrade(&xdr, args, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1621,11 +1983,12 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_lock(&xdr, args, &hdr);
 	encode_nops(&hdr);
@@ -1639,11 +2002,12 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_lockt(&xdr, args, &hdr);
 	encode_nops(&hdr);
@@ -1657,11 +2021,12 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_locku(&xdr, args, &hdr);
 	encode_nops(&hdr);
@@ -1675,22 +2040,16 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
-	unsigned int replen;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_readlink(&xdr, args, req, &hdr);
 
-	/* set up reply kvec
-	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
-	 *      + OP_READLINK + status + string length = 8
-	 */
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2;
-	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
+	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			args->pgbase, args->pglen);
 	encode_nops(&hdr);
 	return 0;
@@ -1703,25 +2062,19 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
-	int replen;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_readdir(&xdr, args, req, &hdr);
 
-	/* set up reply kvec
-	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
-	 *      + OP_READDIR + status + verifer(2)  = 9
-	 */
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readdir_sz) << 2;
-	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
+	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			 args->pgbase, args->count);
 	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
-			__func__, replen, args->pages,
+			__func__, hdr.replen << 2, args->pages,
 			args->pgbase, args->count);
 	encode_nops(&hdr);
 	return 0;
@@ -1732,24 +2085,18 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
  */
 static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	int replen;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_read(&xdr, args, &hdr);
 
-	/* set up reply kvec
-	 *    toplevel status + taglen=0 + rescount + OP_PUTFH + status
-	 *       + OP_READ + status + eof + datalen = 9
-	 */
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
-	xdr_inline_pages(&req->rq_rcv_buf, replen,
+	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
 			 args->pages, args->pgbase, args->count);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	encode_nops(&hdr);
@@ -1763,11 +2110,12 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_setattr(&xdr, args, args->server, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1783,20 +2131,19 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
 		struct nfs_getaclargs *args)
 {
 	struct xdr_stream xdr;
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	int replen;
+	uint32_t replen;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
+	replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1;
 	encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
-	/* set up reply buffer: */
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
-	xdr_inline_pages(&req->rq_rcv_buf, replen,
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
 	encode_nops(&hdr);
 	return 0;
@@ -1809,11 +2156,12 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_write(&xdr, args, &hdr);
 	req->rq_snd_buf.flags |= XDRBUF_WRITE;
@@ -1829,11 +2177,12 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_commit(&xdr, args, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1848,11 +2197,12 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops	= 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_fsinfo(&xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
@@ -1866,11 +2216,12 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
 			   &hdr);
@@ -1885,11 +2236,12 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
 			   args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
@@ -1900,16 +2252,18 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle)
+static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
+				    struct nfs4_server_caps_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
-	encode_putfh(&xdr, fhandle, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
+	encode_putfh(&xdr, args->fhandle, &hdr);
 	encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
 			   FATTR4_WORD0_LINK_SUPPORT|
 			   FATTR4_WORD0_SYMLINK_SUPPORT|
@@ -1929,7 +2283,7 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
 	encode_renew(&xdr, clp, &hdr);
 	encode_nops(&hdr);
 	return 0;
@@ -1946,7 +2300,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
 	encode_setclientid(&xdr, sc, &hdr);
 	encode_nops(&hdr);
 	return 0;
@@ -1964,7 +2318,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
 	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
 	encode_setclientid_confirm(&xdr, clp, &hdr);
 	encode_putrootfh(&xdr, &hdr);
 	encode_fsinfo(&xdr, lease_bitmap, &hdr);
@@ -1979,11 +2333,12 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fhandle, &hdr);
 	encode_delegreturn(&xdr, args->stateid, &hdr);
 	encode_getfattr(&xdr, args->bitmask, &hdr);
@@ -1998,28 +2353,119 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
-	int replen;
+	uint32_t replen;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->dir_fh, &hdr);
 	encode_lookup(&xdr, args->name, &hdr);
+	replen = hdr.replen;	/* get the attribute into args->page */
 	encode_fs_locations(&xdr, args->bitmask, &hdr);
 
-	/* set up reply
-	 *   toplevel_status + OP_PUTFH + status
-	 *   + OP_LOOKUP + status + OP_GETATTR + status = 7
-	 */
-	replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
-	xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
 			0, PAGE_SIZE);
 	encode_nops(&hdr);
 	return 0;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * EXCHANGE_ID request
+ */
+static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
+				    struct nfs41_exchange_id_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = args->client->cl_minorversion,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_exchange_id(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ * a CREATE_SESSION request
+ */
+static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
+				       struct nfs41_create_session_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = args->client->cl_minorversion,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_create_session(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ * a DESTROY_SESSION request
+ */
+static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
+					struct nfs4_session *session)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = session->clp->cl_minorversion,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_destroy_session(&xdr, session, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ * a SEQUENCE request
+ */
+static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
+				 struct nfs4_sequence_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(args),
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ * a GET_LEASE_TIME request
+ */
+static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
+				       struct nfs4_get_lease_time_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+	};
+	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->la_seq_args, &hdr);
+	encode_putrootfh(&xdr, &hdr);
+	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * START OF "GENERIC" DECODE ROUTINES.
  *   These may look a little ugly since they are imported from a "generic"
@@ -3657,7 +4103,7 @@ decode_savefh(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_SAVEFH);
 }
 
-static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
+static int decode_setattr(struct xdr_stream *xdr)
 {
 	__be32 *p;
 	uint32_t bmlen;
@@ -3735,6 +4181,169 @@ static int decode_delegreturn(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_DELEGRETURN);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int decode_exchange_id(struct xdr_stream *xdr,
+			      struct nfs41_exchange_id_res *res)
+{
+	__be32 *p;
+	uint32_t dummy;
+	int status;
+	struct nfs_client *clp = res->client;
+
+	status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
+	if (status)
+		return status;
+
+	READ_BUF(8);
+	READ64(clp->cl_ex_clid);
+	READ_BUF(12);
+	READ32(clp->cl_seqid);
+	READ32(clp->cl_exchange_flags);
+
+	/* We ask for SP4_NONE */
+	READ32(dummy);
+	if (dummy != SP4_NONE)
+		return -EIO;
+
+	/* Throw away minor_id */
+	READ_BUF(8);
+
+	/* Throw away Major id */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	/* Throw away server_scope */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	/* Throw away Implementation id array */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	return 0;
+}
+
+static int decode_chan_attrs(struct xdr_stream *xdr,
+			     struct nfs4_channel_attrs *attrs)
+{
+	__be32 *p;
+	u32 nr_attrs;
+
+	READ_BUF(28);
+	READ32(attrs->headerpadsz);
+	READ32(attrs->max_rqst_sz);
+	READ32(attrs->max_resp_sz);
+	READ32(attrs->max_resp_sz_cached);
+	READ32(attrs->max_ops);
+	READ32(attrs->max_reqs);
+	READ32(nr_attrs);
+	if (unlikely(nr_attrs > 1)) {
+		printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
+			__func__, nr_attrs);
+		return -EINVAL;
+	}
+	if (nr_attrs == 1)
+		READ_BUF(4); /* skip rdma_attrs */
+	return 0;
+}
+
+static int decode_create_session(struct xdr_stream *xdr,
+				 struct nfs41_create_session_res *res)
+{
+	__be32 *p;
+	int status;
+	struct nfs_client *clp = res->client;
+	struct nfs4_session *session = clp->cl_session;
+
+	status = decode_op_hdr(xdr, OP_CREATE_SESSION);
+
+	if (status)
+		return status;
+
+	/* sessionid */
+	READ_BUF(NFS4_MAX_SESSIONID_LEN);
+	COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
+
+	/* seqid, flags */
+	READ_BUF(8);
+	READ32(clp->cl_seqid);
+	READ32(session->flags);
+
+	/* Channel attributes */
+	status = decode_chan_attrs(xdr, &session->fc_attrs);
+	if (!status)
+		status = decode_chan_attrs(xdr, &session->bc_attrs);
+	return status;
+}
+
+static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
+{
+	return decode_op_hdr(xdr, OP_DESTROY_SESSION);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+static int decode_sequence(struct xdr_stream *xdr,
+			   struct nfs4_sequence_res *res,
+			   struct rpc_rqst *rqstp)
+{
+#if defined(CONFIG_NFS_V4_1)
+	struct nfs4_slot *slot;
+	struct nfs4_sessionid id;
+	u32 dummy;
+	int status;
+	__be32 *p;
+
+	if (!res->sr_session)
+		return 0;
+
+	status = decode_op_hdr(xdr, OP_SEQUENCE);
+	if (status)
+		goto out_err;
+
+	/*
+	 * If the server returns different values for sessionID, slotID or
+	 * sequence number, the server is looney tunes.
+	 */
+	status = -ESERVERFAULT;
+
+	slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
+	READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
+	COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
+	if (memcmp(id.data, res->sr_session->sess_id.data,
+		   NFS4_MAX_SESSIONID_LEN)) {
+		dprintk("%s Invalid session id\n", __func__);
+		goto out_err;
+	}
+	/* seqid */
+	READ32(dummy);
+	if (dummy != slot->seq_nr) {
+		dprintk("%s Invalid sequence number\n", __func__);
+		goto out_err;
+	}
+	/* slot id */
+	READ32(dummy);
+	if (dummy != res->sr_slotid) {
+		dprintk("%s Invalid slot id\n", __func__);
+		goto out_err;
+	}
+	/* highest slot id - currently not processed */
+	READ32(dummy);
+	/* target highest slot id - currently not processed */
+	READ32(dummy);
+	/* result flags - currently not processed */
+	READ32(dummy);
+	status = 0;
+out_err:
+	res->sr_status = status;
+	return status;
+#else  /* CONFIG_NFS_V4_1 */
+	return 0;
+#endif /* CONFIG_NFS_V4_1 */
+}
+
 /*
  * END OF "GENERIC" DECODE ROUTINES.
  */
@@ -3752,6 +4361,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -3773,7 +4385,11 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	status = decode_putfh(&xdr);
 	if (status != 0)
@@ -3796,7 +4412,11 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -3819,7 +4439,11 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putrootfh(&xdr)) != 0)
 		goto out;
@@ -3839,7 +4463,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -3860,7 +4488,11 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -3890,7 +4522,11 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -3923,7 +4559,11 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
 	int status;
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -3963,6 +4603,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -3979,12 +4622,13 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops   = 0,
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, &hdr);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
 	encode_putfh(&xdr, args->fh, &hdr);
 	status = encode_setacl(&xdr, args, &hdr);
 	encode_nops(&hdr);
@@ -3995,7 +4639,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args
  * Decode SETACL response
  */
 static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res)
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+		    struct nfs_setaclres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4005,10 +4650,13 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res)
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr, res);
+	status = decode_setattr(&xdr);
 out:
 	return status;
 }
@@ -4017,7 +4665,8 @@ out:
  * Decode GETACL response
  */
 static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+		    struct nfs_getaclres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4027,10 +4676,13 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(&xdr, rqstp, acl_len);
+	status = decode_getacl(&xdr, rqstp, &res->acl_len);
 
 out:
 	return status;
@@ -4049,6 +4701,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4079,6 +4734,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4133,6 +4791,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4157,10 +4818,13 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr, res);
+	status = decode_setattr(&xdr);
 	if (status)
 		goto out;
 	decode_getfattr(&xdr, res->fattr, res->server);
@@ -4181,6 +4845,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4202,6 +4869,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4223,6 +4893,9 @@ static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4234,7 +4907,8 @@ out:
 /*
  * Decode READLINK response
  */
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res)
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+				 struct nfs4_readlink_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4244,6 +4918,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res)
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4265,6 +4942,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_r
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4286,6 +4966,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readr
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4309,6 +4992,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4335,6 +5021,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (status)
 		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
@@ -4349,7 +5038,8 @@ out:
 /*
  * FSINFO request
  */
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+			       struct nfs4_fsinfo_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4358,16 +5048,19 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (!status)
+		status = decode_sequence(&xdr, &res->seq_res, req);
+	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, fsinfo);
+		status = decode_fsinfo(&xdr, res->fsinfo);
 	return status;
 }
 
 /*
  * PATHCONF request
  */
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf)
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+				 struct nfs4_pathconf_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4376,16 +5069,19 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (!status)
+		status = decode_sequence(&xdr, &res->seq_res, req);
+	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_pathconf(&xdr, pathconf);
+		status = decode_pathconf(&xdr, res->pathconf);
 	return status;
 }
 
 /*
  * STATFS request
  */
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat)
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+			       struct nfs4_statfs_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4394,9 +5090,11 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fssta
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (!status)
+		status = decode_sequence(&xdr, &res->seq_res, req);
+	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_statfs(&xdr, fsstat);
+		status = decode_statfs(&xdr, res->fsstat);
 	return status;
 }
 
@@ -4410,7 +5108,11 @@ static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4
 	int status;
 
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, req);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
@@ -4483,7 +5185,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
-	if (status != 0)
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
 		goto out;
 	status = decode_putfh(&xdr);
 	if (status != 0)
@@ -4497,7 +5202,8 @@ out:
 /*
  * FS_LOCATIONS request
  */
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res)
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+				     struct nfs4_fs_locations_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4505,18 +5211,113 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
-	if (status != 0)
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, req);
+	if (status)
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
 	if ((status = decode_lookup(&xdr)) != 0)
 		goto out;
 	xdr_enter_page(&xdr, PAGE_SIZE);
-	status = decode_getfattr(&xdr, &res->fattr, res->server);
+	status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+				 res->fs_locations->server);
 out:
 	return status;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * EXCHANGE_ID request
+ */
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+				    void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_exchange_id(&xdr, res);
+	return status;
+}
+
+/*
+ * a CREATE_SESSION request
+ */
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+				       struct nfs41_create_session_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_create_session(&xdr, res);
+	return status;
+}
+
+/*
+ * a DESTROY_SESSION request
+ */
+static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
+					void *dummy)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_destroy_session(&xdr, dummy);
+	return status;
+}
+
+/*
+ * a SEQUENCE request
+ */
+static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
+				 struct nfs4_sequence_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_sequence(&xdr, res, rqstp);
+	return status;
+}
+
+/*
+ * a GET_LEASE_TIME request
+ */
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+				       struct nfs4_get_lease_time_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+	if (!status)
+		status = decode_putrootfh(&xdr);
+	if (!status)
+		status = decode_fsinfo(&xdr, res->lr_fsinfo);
+	return status;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
@@ -4686,6 +5487,13 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(GETACL,		enc_getacl,	dec_getacl),
   PROC(SETACL,		enc_setacl,	dec_setacl),
   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
+#if defined(CONFIG_NFS_V4_1)
+  PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+  PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+  PROC(DESTROY_SESSION,	enc_destroy_session,	dec_destroy_session),
+  PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+  PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+#endif /* CONFIG_NFS_V4_1 */
 };
 
 struct rpc_version		nfs_version4 = {
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index e3ed5908820b..8c55b27c0de4 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -92,6 +92,9 @@
 #undef NFSROOT_DEBUG
 #define NFSDBG_FACILITY NFSDBG_ROOT
 
+/* Default port to use if server is not running a portmapper */
+#define NFS_MNT_PORT	627
+
 /* Default path we try to mount. "%s" gets replaced by our IP address */
 #define NFS_ROOT		"/tftpboot/%s"
 
@@ -487,6 +490,7 @@ static int __init root_nfs_get_handle(void)
 {
 	struct nfs_fh fh;
 	struct sockaddr_in sin;
+	unsigned int auth_flav_len = 0;
 	struct nfs_mount_request request = {
 		.sap		= (struct sockaddr *)&sin,
 		.salen		= sizeof(sin),
@@ -496,6 +500,7 @@ static int __init root_nfs_get_handle(void)
 		.protocol	= (nfs_data.flags & NFS_MOUNT_TCP) ?
 					XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
 		.fh		= &fh,
+		.auth_flav_len	= &auth_flav_len,
 	};
 	int status;
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ace3c50a8eb..96c4ebfa46f4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -22,6 +22,7 @@
 
 #include <asm/system.h>
 
+#include "nfs4_fs.h"
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
@@ -46,6 +47,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
@@ -357,19 +359,25 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 	struct nfs_readres *resp = &data->res;
 
 	if (resp->eof || resp->count == argp->count)
-		return;
+		goto out;
 
 	/* This is a short read! */
 	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0)
-		return;
+		goto out;
 
 	/* Yes, so retry the read at the end of the data */
 	argp->offset += resp->count;
 	argp->pgbase += resp->count;
 	argp->count -= resp->count;
-	rpc_restart_call(task);
+	nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
+	return;
+out:
+	nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client,
+				&data->res.seq_res);
+	return;
+
 }
 
 /*
@@ -406,7 +414,23 @@ static void nfs_readpage_release_partial(void *calldata)
 	nfs_readdata_release(calldata);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+void nfs_read_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_read_data *data = calldata;
+
+	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
+				&data->args.seq_args, &data->res.seq_res,
+				0, task))
+		return;
+	rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static const struct rpc_call_ops nfs_read_partial_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_read_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_readpage_result_partial,
 	.rpc_release = nfs_readpage_release_partial,
 };
@@ -470,6 +494,9 @@ static void nfs_readpage_release_full(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_read_full_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_read_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_readpage_result_full,
 	.rpc_release = nfs_readpage_release_full,
 };
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 26127b69a275..0b4cbdc60abd 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -42,6 +42,8 @@
 #include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/mnt_namespace.h>
+#include <linux/namei.h>
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
@@ -90,6 +92,7 @@ enum {
 	Opt_mountport,
 	Opt_mountvers,
 	Opt_nfsvers,
+	Opt_minorversion,
 
 	/* Mount options that take string arguments */
 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
@@ -139,22 +142,23 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_fscache_uniq, "fsc=%s" },
 	{ Opt_nofscache, "nofsc" },
 
-	{ Opt_port, "port=%u" },
-	{ Opt_rsize, "rsize=%u" },
-	{ Opt_wsize, "wsize=%u" },
-	{ Opt_bsize, "bsize=%u" },
-	{ Opt_timeo, "timeo=%u" },
-	{ Opt_retrans, "retrans=%u" },
-	{ Opt_acregmin, "acregmin=%u" },
-	{ Opt_acregmax, "acregmax=%u" },
-	{ Opt_acdirmin, "acdirmin=%u" },
-	{ Opt_acdirmax, "acdirmax=%u" },
-	{ Opt_actimeo, "actimeo=%u" },
-	{ Opt_namelen, "namlen=%u" },
-	{ Opt_mountport, "mountport=%u" },
-	{ Opt_mountvers, "mountvers=%u" },
-	{ Opt_nfsvers, "nfsvers=%u" },
-	{ Opt_nfsvers, "vers=%u" },
+	{ Opt_port, "port=%s" },
+	{ Opt_rsize, "rsize=%s" },
+	{ Opt_wsize, "wsize=%s" },
+	{ Opt_bsize, "bsize=%s" },
+	{ Opt_timeo, "timeo=%s" },
+	{ Opt_retrans, "retrans=%s" },
+	{ Opt_acregmin, "acregmin=%s" },
+	{ Opt_acregmax, "acregmax=%s" },
+	{ Opt_acdirmin, "acdirmin=%s" },
+	{ Opt_acdirmax, "acdirmax=%s" },
+	{ Opt_actimeo, "actimeo=%s" },
+	{ Opt_namelen, "namlen=%s" },
+	{ Opt_mountport, "mountport=%s" },
+	{ Opt_mountvers, "mountvers=%s" },
+	{ Opt_nfsvers, "nfsvers=%s" },
+	{ Opt_nfsvers, "vers=%s" },
+	{ Opt_minorversion, "minorversion=%u" },
 
 	{ Opt_sec, "sec=%s" },
 	{ Opt_proto, "proto=%s" },
@@ -270,10 +274,14 @@ static const struct super_operations nfs_sops = {
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_remote_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static int nfs4_referral_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -284,6 +292,14 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
+static struct file_system_type nfs4_remote_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_remote_get_sb,
+	.kill_sb	= nfs4_kill_super,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
 struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
@@ -292,6 +308,14 @@ struct file_system_type nfs4_xdev_fs_type = {
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
+static struct file_system_type nfs4_remote_referral_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfs4",
+	.get_sb		= nfs4_remote_referral_get_sb,
+	.kill_sb	= nfs4_kill_super,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
 struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
@@ -514,7 +538,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
-		{ NFS_MOUNT_INTR, ",intr", ",nointr" },
 		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
@@ -943,11 +966,6 @@ static int nfs_parse_security_flavors(char *value,
 	return 1;
 }
 
-static void nfs_parse_invalid_value(const char *option)
-{
-	dfprintk(MOUNT, "NFS:   bad value specified for %s option\n", option);
-}
-
 /*
  * Error-check and convert a string of mount options from user space into
  * a data structure.  The whole mount string is processed; bad options are
@@ -958,7 +976,7 @@ static int nfs_parse_mount_options(char *raw,
 				   struct nfs_parsed_mount_data *mnt)
 {
 	char *p, *string, *secdata;
-	int rc, sloppy = 0, errors = 0;
+	int rc, sloppy = 0, invalid_option = 0;
 
 	if (!raw) {
 		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -982,7 +1000,9 @@ static int nfs_parse_mount_options(char *raw,
 
 	while ((p = strsep(&raw, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
-		int option, token;
+		unsigned long option;
+		int int_option;
+		int token;
 
 		if (!*p)
 			continue;
@@ -1091,114 +1111,156 @@ static int nfs_parse_mount_options(char *raw,
 		 * options that take numeric values
 		 */
 		case Opt_port:
-			if (match_int(args, &option) ||
-			    option < 0 || option > USHORT_MAX) {
-				errors++;
-				nfs_parse_invalid_value("port");
-			} else
-				mnt->nfs_server.port = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0 || option > USHORT_MAX)
+				goto out_invalid_value;
+			mnt->nfs_server.port = option;
 			break;
 		case Opt_rsize:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("rsize");
-			} else
-				mnt->rsize = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->rsize = option;
 			break;
 		case Opt_wsize:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("wsize");
-			} else
-				mnt->wsize = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->wsize = option;
 			break;
 		case Opt_bsize:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("bsize");
-			} else
-				mnt->bsize = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->bsize = option;
 			break;
 		case Opt_timeo:
-			if (match_int(args, &option) || option <= 0) {
-				errors++;
-				nfs_parse_invalid_value("timeo");
-			} else
-				mnt->timeo = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0 || option == 0)
+				goto out_invalid_value;
+			mnt->timeo = option;
 			break;
 		case Opt_retrans:
-			if (match_int(args, &option) || option <= 0) {
-				errors++;
-				nfs_parse_invalid_value("retrans");
-			} else
-				mnt->retrans = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0 || option == 0)
+				goto out_invalid_value;
+			mnt->retrans = option;
 			break;
 		case Opt_acregmin:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("acregmin");
-			} else
-				mnt->acregmin = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->acregmin = option;
 			break;
 		case Opt_acregmax:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("acregmax");
-			} else
-				mnt->acregmax = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->acregmax = option;
 			break;
 		case Opt_acdirmin:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("acdirmin");
-			} else
-				mnt->acdirmin = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->acdirmin = option;
 			break;
 		case Opt_acdirmax:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("acdirmax");
-			} else
-				mnt->acdirmax = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->acdirmax = option;
 			break;
 		case Opt_actimeo:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("actimeo");
-			} else
-				mnt->acregmin = mnt->acregmax =
-				mnt->acdirmin = mnt->acdirmax = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->acregmin = mnt->acregmax =
+			mnt->acdirmin = mnt->acdirmax = option;
 			break;
 		case Opt_namelen:
-			if (match_int(args, &option) || option < 0) {
-				errors++;
-				nfs_parse_invalid_value("namlen");
-			} else
-				mnt->namlen = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
+			mnt->namlen = option;
 			break;
 		case Opt_mountport:
-			if (match_int(args, &option) ||
-			    option < 0 || option > USHORT_MAX) {
-				errors++;
-				nfs_parse_invalid_value("mountport");
-			} else
-				mnt->mount_server.port = option;
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0 || option > USHORT_MAX)
+				goto out_invalid_value;
+			mnt->mount_server.port = option;
 			break;
 		case Opt_mountvers:
-			if (match_int(args, &option) ||
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0 ||
 			    option < NFS_MNT_VERSION ||
-			    option > NFS_MNT3_VERSION) {
-				errors++;
-				nfs_parse_invalid_value("mountvers");
-			} else
-				mnt->mount_server.version = option;
+			    option > NFS_MNT3_VERSION)
+				goto out_invalid_value;
+			mnt->mount_server.version = option;
 			break;
 		case Opt_nfsvers:
-			if (match_int(args, &option)) {
-				errors++;
-				nfs_parse_invalid_value("nfsvers");
-				break;
-			}
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			rc = strict_strtoul(string, 10, &option);
+			kfree(string);
+			if (rc != 0)
+				goto out_invalid_value;
 			switch (option) {
 			case NFS2_VERSION:
 				mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1207,10 +1269,16 @@ static int nfs_parse_mount_options(char *raw,
 				mnt->flags |= NFS_MOUNT_VER3;
 				break;
 			default:
-				errors++;
-				nfs_parse_invalid_value("nfsvers");
+				goto out_invalid_value;
 			}
 			break;
+		case Opt_minorversion:
+			if (match_int(args, &int_option))
+				return 0;
+			if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION)
+				return 0;
+			mnt->minorversion = int_option;
+			break;
 
 		/*
 		 * options that take text values
@@ -1222,9 +1290,9 @@ static int nfs_parse_mount_options(char *raw,
 			rc = nfs_parse_security_flavors(string, mnt);
 			kfree(string);
 			if (!rc) {
-				errors++;
 				dfprintk(MOUNT, "NFS:   unrecognized "
 						"security flavor\n");
+				return 0;
 			}
 			break;
 		case Opt_proto:
@@ -1238,23 +1306,25 @@ static int nfs_parse_mount_options(char *raw,
 			case Opt_xprt_udp:
 				mnt->flags &= ~NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+				kfree(string);
 				break;
 			case Opt_xprt_tcp:
 				mnt->flags |= NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+				kfree(string);
 				break;
 			case Opt_xprt_rdma:
 				/* vector side protocols to TCP */
 				mnt->flags |= NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
 				xprt_load_transport(string);
+				kfree(string);
 				break;
 			default:
-				errors++;
 				dfprintk(MOUNT, "NFS:   unrecognized "
 						"transport protocol\n");
+				return 0;
 			}
-			kfree(string);
 			break;
 		case Opt_mountproto:
 			string = match_strdup(args);
@@ -1273,9 +1343,9 @@ static int nfs_parse_mount_options(char *raw,
 				break;
 			case Opt_xprt_rdma: /* not used for side protocols */
 			default:
-				errors++;
 				dfprintk(MOUNT, "NFS:   unrecognized "
 						"transport protocol\n");
+				return 0;
 			}
 			break;
 		case Opt_addr:
@@ -1331,9 +1401,9 @@ static int nfs_parse_mount_options(char *raw,
 					mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
 					break;
 				default:
-					errors++;
 					dfprintk(MOUNT, "NFS:   invalid "
 							"lookupcache argument\n");
+					return 0;
 			};
 			break;
 
@@ -1351,20 +1421,20 @@ static int nfs_parse_mount_options(char *raw,
 			break;
 
 		default:
-			errors++;
+			invalid_option = 1;
 			dfprintk(MOUNT, "NFS:   unrecognized mount option "
 					"'%s'\n", p);
 		}
 	}
 
-	if (errors > 0) {
-		dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
-				errors, (errors == 1 ? "" : "s"));
-		if (!sloppy)
-			return 0;
-	}
+	if (!sloppy && invalid_option)
+		return 0;
+
 	return 1;
 
+out_invalid_value:
+	printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p);
+	return 0;
 out_nomem:
 	printk(KERN_INFO "NFS: not enough memory to parse option\n");
 	return 0;
@@ -1381,6 +1451,7 @@ out_security_failure:
 static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 			 struct nfs_fh *root_fh)
 {
+	unsigned int auth_flavor_len = 0;
 	struct nfs_mount_request request = {
 		.sap		= (struct sockaddr *)
 						&args->mount_server.address,
@@ -1388,6 +1459,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 		.protocol	= args->mount_server.protocol,
 		.fh		= root_fh,
 		.noresvport	= args->flags & NFS_MOUNT_NORESVPORT,
+		.auth_flav_len	= &auth_flavor_len,
 	};
 	int status;
 
@@ -2240,6 +2312,11 @@ static void nfs4_fill_super(struct super_block *sb)
 	nfs_initialise_sb(sb);
 }
 
+static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
+{
+	args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
+}
+
 /*
  * Validate NFSv4 mount options
  */
@@ -2263,6 +2340,7 @@ static int nfs4_validate_mount_data(void *options,
 	args->nfs_server.port	= NFS_PORT; /* 2049 unless user set port= */
 	args->auth_flavors[0]	= RPC_AUTH_UNIX;
 	args->auth_flavor_len	= 0;
+	args->minorversion	= 0;
 
 	switch (data->version) {
 	case 1:
@@ -2336,6 +2414,8 @@ static int nfs4_validate_mount_data(void *options,
 
 		nfs_validate_transport_protocol(args);
 
+		nfs4_validate_mount_flags(args);
+
 		if (args->auth_flavor_len > 1)
 			goto out_inval_auth;
 
@@ -2375,12 +2455,12 @@ out_no_client_address:
 }
 
 /*
- * Get the superblock for an NFS4 mountpoint
+ * Get the superblock for the NFS4 root partition
  */
-static int nfs4_get_sb(struct file_system_type *fs_type,
+static int nfs4_remote_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	struct nfs_parsed_mount_data *data;
+	struct nfs_parsed_mount_data *data = raw_data;
 	struct super_block *s;
 	struct nfs_server *server;
 	struct nfs_fh *mntfh;
@@ -2391,18 +2471,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	};
 	int error = -ENOMEM;
 
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
 	if (data == NULL || mntfh == NULL)
 		goto out_free_fh;
 
 	security_init_mnt_opts(&data->lsm_opts);
 
-	/* Validate the mount data */
-	error = nfs4_validate_mount_data(raw_data, data, dev_name);
-	if (error < 0)
-		goto out;
-
 	/* Get a volume representation */
 	server = nfs4_create_server(data, mntfh);
 	if (IS_ERR(server)) {
@@ -2415,7 +2489,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		compare_super = NULL;
 
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
+	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
@@ -2452,14 +2526,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	error = 0;
 
 out:
-	kfree(data->client_address);
-	kfree(data->nfs_server.export_path);
-	kfree(data->nfs_server.hostname);
-	kfree(data->fscache_uniq);
 	security_free_mnt_opts(&data->lsm_opts);
 out_free_fh:
 	kfree(mntfh);
-	kfree(data);
 	return error;
 
 out_free:
@@ -2473,16 +2542,137 @@ error_splat_super:
 	goto out;
 }
 
+static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
+		int flags, void *data, const char *hostname)
+{
+	struct vfsmount *root_mnt;
+	char *root_devname;
+	size_t len;
+
+	len = strlen(hostname) + 3;
+	root_devname = kmalloc(len, GFP_KERNEL);
+	if (root_devname == NULL)
+		return ERR_PTR(-ENOMEM);
+	snprintf(root_devname, len, "%s:/", hostname);
+	root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
+	kfree(root_devname);
+	return root_mnt;
+}
+
+static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
+{
+	char *page = (char *) __get_free_page(GFP_KERNEL);
+	char *devname, *tmp;
+
+	if (page == NULL)
+		return;
+	devname = nfs_path(path->mnt->mnt_devname,
+			path->mnt->mnt_root, path->dentry,
+			page, PAGE_SIZE);
+	if (devname == NULL)
+		goto out_freepage;
+	tmp = kstrdup(devname, GFP_KERNEL);
+	if (tmp == NULL)
+		goto out_freepage;
+	kfree(mnt->mnt_devname);
+	mnt->mnt_devname = tmp;
+out_freepage:
+	free_page((unsigned long)page);
+}
+
+static int nfs_follow_remote_path(struct vfsmount *root_mnt,
+		const char *export_path, struct vfsmount *mnt_target)
+{
+	struct mnt_namespace *ns_private;
+	struct nameidata nd;
+	struct super_block *s;
+	int ret;
+
+	ns_private = create_mnt_ns(root_mnt);
+	ret = PTR_ERR(ns_private);
+	if (IS_ERR(ns_private))
+		goto out_mntput;
+
+	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
+			export_path, LOOKUP_FOLLOW, &nd);
+
+	put_mnt_ns(ns_private);
+
+	if (ret != 0)
+		goto out_err;
+
+	s = nd.path.mnt->mnt_sb;
+	atomic_inc(&s->s_active);
+	mnt_target->mnt_sb = s;
+	mnt_target->mnt_root = dget(nd.path.dentry);
+
+	/* Correct the device pathname */
+	nfs_fix_devname(&nd.path, mnt_target);
+
+	path_put(&nd.path);
+	down_write(&s->s_umount);
+	return 0;
+out_mntput:
+	mntput(root_mnt);
+out_err:
+	return ret;
+}
+
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
+static int nfs4_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+{
+	struct nfs_parsed_mount_data *data;
+	char *export_path;
+	struct vfsmount *root_mnt;
+	int error = -ENOMEM;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (data == NULL)
+		goto out_free_data;
+
+	/* Validate the mount data */
+	error = nfs4_validate_mount_data(raw_data, data, dev_name);
+	if (error < 0)
+		goto out;
+
+	export_path = data->nfs_server.export_path;
+	data->nfs_server.export_path = "/";
+	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
+			data->nfs_server.hostname);
+	data->nfs_server.export_path = export_path;
+
+	error = PTR_ERR(root_mnt);
+	if (IS_ERR(root_mnt))
+		goto out;
+
+	error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+
+out:
+	kfree(data->client_address);
+	kfree(data->nfs_server.export_path);
+	kfree(data->nfs_server.hostname);
+	kfree(data->fscache_uniq);
+out_free_data:
+	kfree(data);
+	dprintk("<-- nfs4_get_sb() = %d%s\n", error,
+			error != 0 ? " [error]" : "");
+	return error;
+}
+
 static void nfs4_kill_super(struct super_block *sb)
 {
 	struct nfs_server *server = NFS_SB(sb);
 
+	dprintk("--> %s\n", __func__);
 	nfs_super_return_all_delegations(sb);
 	kill_anon_super(sb);
-
 	nfs4_renewd_prepare_shutdown(server);
 	nfs_fscache_release_super_cookie(sb);
 	nfs_free_server(server);
+	dprintk("<-- %s\n", __func__);
 }
 
 /*
@@ -2568,12 +2758,9 @@ error_splat_super:
 	return error;
 }
 
-/*
- * Create an NFS4 server record on referral traversal
- */
-static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
-				const char *dev_name, void *raw_data,
-				struct vfsmount *mnt)
+static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data,
+		struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
 	struct super_block *s;
@@ -2652,4 +2839,36 @@ error_splat_super:
 	return error;
 }
 
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data,
+		struct vfsmount *mnt)
+{
+	struct nfs_clone_mount *data = raw_data;
+	char *export_path;
+	struct vfsmount *root_mnt;
+	int error;
+
+	dprintk("--> nfs4_referral_get_sb()\n");
+
+	export_path = data->mnt_path;
+	data->mnt_path = "/";
+
+	root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
+			flags, data, data->hostname);
+	data->mnt_path = export_path;
+
+	error = PTR_ERR(root_mnt);
+	if (IS_ERR(root_mnt))
+		goto out;
+
+	error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+out:
+	dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error,
+			error != 0 ? " [error]" : "");
+	return error;
+}
+
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index ecc295347775..1064c91ae810 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -15,6 +15,7 @@
 #include <linux/wait.h>
 
 #include "internal.h"
+#include "nfs4_fs.h"
 
 struct nfs_unlinkdata {
 	struct hlist_node list;
@@ -82,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 	struct inode *dir = data->dir;
 
 	if (!NFS_PROTO(dir)->unlink_done(task, dir))
-		rpc_restart_call(task);
+		nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client);
 }
 
 /**
@@ -102,9 +103,25 @@ static void nfs_async_unlink_release(void *calldata)
 	nfs_sb_deactive(sb);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_unlinkdata *data = calldata;
+	struct nfs_server *server = NFS_SERVER(data->dir);
+
+	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+				&data->res.seq_res, 1, task))
+		return;
+	rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static const struct rpc_call_ops nfs_unlink_ops = {
 	.rpc_call_done = nfs_async_unlink_done,
 	.rpc_release = nfs_async_unlink_release,
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_unlink_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 };
 
 static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
@@ -241,6 +258,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 		status = PTR_ERR(data->cred);
 		goto out_free;
 	}
+	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 
 	status = -EBUSY;
 	spin_lock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e560a78995a3..35d81316163f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -19,12 +19,14 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs_page.h>
 #include <linux/backing-dev.h>
+#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 
 #include "delegation.h"
 #include "internal.h"
 #include "iostat.h"
+#include "nfs4_fs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -52,6 +54,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	}
 	return p;
 }
@@ -71,6 +74,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
@@ -199,8 +203,10 @@ static int nfs_set_page_writeback(struct page *page)
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
 		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH)
-			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+				NFS_CONGESTION_ON_THRESH) {
+			set_bdi_congested(&nfss->backing_dev_info,
+						BLK_RW_ASYNC);
+		}
 	}
 	return ret;
 }
@@ -212,7 +218,7 @@ static void nfs_end_page_writeback(struct page *page)
 
 	end_page_writeback(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
 
 /*
@@ -1048,7 +1054,23 @@ out:
 	nfs_writedata_release(calldata);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+void nfs_write_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
+
+	if (nfs4_setup_sequence(clp, &data->args.seq_args,
+				&data->res.seq_res, 1, task))
+		return;
+	rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static const struct rpc_call_ops nfs_write_partial_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_write_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_writeback_done_partial,
 	.rpc_release = nfs_writeback_release_partial,
 };
@@ -1111,6 +1133,9 @@ remove_request:
 }
 
 static const struct rpc_call_ops nfs_write_full_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_write_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_writeback_done_full,
 	.rpc_release = nfs_writeback_release_full,
 };
@@ -1123,6 +1148,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	struct nfs_server	*server = NFS_SERVER(data->inode);
 	int status;
 
 	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1155,7 +1181,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+				server->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
@@ -1181,7 +1207,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 				 */
 				argp->stable = NFS_FILE_SYNC;
 			}
-			rpc_restart_call(task);
+			nfs4_restart_rpc(task, server->nfs_client);
 			return -EAGAIN;
 		}
 		if (time_before(complain, jiffies)) {
@@ -1193,6 +1219,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
+	nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res);
 	return 0;
 }
 
@@ -1349,6 +1376,9 @@ static void nfs_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_write_prepare,
+#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs_commit_done,
 	.rpc_release = nfs_commit_release,
 };
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8b1f8efb4690..b92a27629fb7 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -464,16 +464,11 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
 		if (err)
 			return err;
 		/*
-		 * Just a quick sanity check; we could also try to check
-		 * whether this pseudoflavor is supported, but at worst
-		 * an unsupported pseudoflavor on the export would just
-		 * be a pseudoflavor that won't match the flavor of any
-		 * authenticated request.  The administrator will
-		 * probably discover the problem when someone fails to
-		 * authenticate.
+		 * XXX: It would be nice to also check whether this
+		 * pseudoflavor is supported, so we can discover the
+		 * problem at export time instead of when a client fails
+		 * to authenticate.
 		 */
-		if (f->pseudoflavor < 0)
-			return -EINVAL;
 		err = get_int(mesg, &f->flags);
 		if (err)
 			return err;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7c9fe838f038..a713c418a922 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -652,8 +652,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
  * NFSv3 Server procedures.
  * Only the results of non-idempotent operations are cached.
  */
-#define nfs3svc_decode_voidargs		NULL
-#define nfs3svc_release_void		NULL
 #define nfs3svc_decode_fhandleargs	nfs3svc_decode_fhandle
 #define nfs3svc_encode_attrstatres	nfs3svc_encode_attrstat
 #define nfs3svc_encode_wccstatres	nfs3svc_encode_wccstat
@@ -686,28 +684,219 @@ struct nfsd3_voidargs { int dummy; };
 #define WC (7+pAT)	/* WCC attributes */
 
 static struct svc_procedure		nfsd_procedures3[22] = {
-  PROC(null,	 void,		void,		void,	  RC_NOCACHE, ST),
-  PROC(getattr,	 fhandle,	attrstat,	fhandle,  RC_NOCACHE, ST+AT),
-  PROC(setattr,  sattr,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
-  PROC(lookup,	 dirop,		dirop,		fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
-  PROC(access,	 access,	access,		fhandle,  RC_NOCACHE, ST+pAT+1),
-  PROC(readlink, readlink,	readlink,	fhandle,  RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
-  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
-  PROC(write,	 write,		write,		fhandle,  RC_REPLBUFF, ST+WC+4),
-  PROC(create,	 create,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
-  PROC(mkdir,	 mkdir,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
-  PROC(symlink,	 symlink,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
-  PROC(mknod,	 mknod,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
-  PROC(remove,	 dirop,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
-  PROC(rmdir,	 dirop,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
-  PROC(rename,	 rename,	rename,		fhandle2, RC_REPLBUFF, ST+WC+WC),
-  PROC(link,	 link,		link,		fhandle2, RC_REPLBUFF, ST+pAT+WC),
-  PROC(readdir,	 readdir,	readdir,	fhandle,  RC_NOCACHE, 0),
-  PROC(readdirplus,readdirplus,	readdir,	fhandle,  RC_NOCACHE, 0),
-  PROC(fsstat,	 fhandle,	fsstat,		void,     RC_NOCACHE, ST+pAT+2*6+1),
-  PROC(fsinfo,   fhandle,	fsinfo,		void,     RC_NOCACHE, ST+pAT+12),
-  PROC(pathconf, fhandle,	pathconf,	void,     RC_NOCACHE, ST+pAT+6),
-  PROC(commit,	 commit,	commit,		fhandle,  RC_NOCACHE, ST+WC+2),
+	[NFS3PROC_NULL] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_null,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[NFS3PROC_GETATTR] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+		.pc_ressize = sizeof(struct nfsd3_attrstatres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT,
+	},
+	[NFS3PROC_SETATTR] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_sattrargs),
+		.pc_ressize = sizeof(struct nfsd3_wccstatres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+WC,
+	},
+	[NFS3PROC_LOOKUP] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_diropargs),
+		.pc_ressize = sizeof(struct nfsd3_diropres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+FH+pAT+pAT,
+	},
+	[NFS3PROC_ACCESS] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_access,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_accessargs),
+		.pc_ressize = sizeof(struct nfsd3_accessres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+1,
+	},
+	[NFS3PROC_READLINK] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
+		.pc_ressize = sizeof(struct nfsd3_readlinkres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
+	},
+	[NFS3PROC_READ] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_read,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_readargs),
+		.pc_ressize = sizeof(struct nfsd3_readres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
+	},
+	[NFS3PROC_WRITE] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_write,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_writeargs),
+		.pc_ressize = sizeof(struct nfsd3_writeres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+WC+4,
+	},
+	[NFS3PROC_CREATE] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_create,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_createargs),
+		.pc_ressize = sizeof(struct nfsd3_createres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+(1+FH+pAT)+WC,
+	},
+	[NFS3PROC_MKDIR] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
+		.pc_ressize = sizeof(struct nfsd3_createres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+(1+FH+pAT)+WC,
+	},
+	[NFS3PROC_SYMLINK] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
+		.pc_ressize = sizeof(struct nfsd3_createres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+(1+FH+pAT)+WC,
+	},
+	[NFS3PROC_MKNOD] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_mknodargs),
+		.pc_ressize = sizeof(struct nfsd3_createres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+(1+FH+pAT)+WC,
+	},
+	[NFS3PROC_REMOVE] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_remove,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_diropargs),
+		.pc_ressize = sizeof(struct nfsd3_wccstatres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+WC,
+	},
+	[NFS3PROC_RMDIR] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_diropargs),
+		.pc_ressize = sizeof(struct nfsd3_wccstatres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+WC,
+	},
+	[NFS3PROC_RENAME] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_rename,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_renameargs),
+		.pc_ressize = sizeof(struct nfsd3_renameres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+WC+WC,
+	},
+	[NFS3PROC_LINK] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_link,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_argsize = sizeof(struct nfsd3_linkargs),
+		.pc_ressize = sizeof(struct nfsd3_linkres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+pAT+WC,
+	},
+	[NFS3PROC_READDIR] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_readdirargs),
+		.pc_ressize = sizeof(struct nfsd3_readdirres),
+		.pc_cachetype = RC_NOCACHE,
+	},
+	[NFS3PROC_READDIRPLUS] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
+		.pc_ressize = sizeof(struct nfsd3_readdirres),
+		.pc_cachetype = RC_NOCACHE,
+	},
+	[NFS3PROC_FSSTAT] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+		.pc_ressize = sizeof(struct nfsd3_fsstatres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+2*6+1,
+	},
+	[NFS3PROC_FSINFO] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+		.pc_ressize = sizeof(struct nfsd3_fsinfores),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+12,
+	},
+	[NFS3PROC_PATHCONF] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+		.pc_ressize = sizeof(struct nfsd3_pathconfres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+pAT+6,
+	},
+	[NFS3PROC_COMMIT] = {
+		.pc_func = (svc_procfunc) nfsd3_proc_commit,
+		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
+		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
+		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd3_commitargs),
+		.pc_ressize = sizeof(struct nfsd3_commitres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+WC+2,
+	},
 };
 
 struct svc_version	nfsd_version3 = {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 17d0dd997204..01d4ec1c88e0 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 
 	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
 			&fhp->fh_post_attr);
+	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
 	if (err)
 		fhp->fh_post_saved = 0;
 	else
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 290289bd44f7..3fd23f7aceca 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -140,8 +140,10 @@ struct nfs4_cb_compound_hdr {
 	int		status;
 	u32		ident;
 	u32		nops;
+	__be32		*nops_p;
+	u32		minorversion;
 	u32		taglen;
-	char *		tag;
+	char		*tag;
 };
 
 static struct {
@@ -201,33 +203,39 @@ nfs_cb_stat_to_errno(int stat)
  * XDR encode
  */
 
-static int
+static void
 encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 {
 	__be32 * p;
 
 	RESERVE_SPACE(16);
 	WRITE32(0);            /* tag length is always 0 */
-	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->minorversion);
 	WRITE32(hdr->ident);
+	hdr->nops_p = p;
 	WRITE32(hdr->nops);
-	return 0;
 }
 
-static int
-encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
+static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+{
+	*hdr->nops_p = htonl(hdr->nops);
+}
+
+static void
+encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
+		struct nfs4_cb_compound_hdr *hdr)
 {
 	__be32 *p;
-	int len = cb_rec->cbr_fh.fh_size;
+	int len = dp->dl_fh.fh_size;
 
-	RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
+	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
 	WRITE32(OP_CB_RECALL);
-	WRITE32(cb_rec->cbr_stateid.si_generation);
-	WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t));
-	WRITE32(cb_rec->cbr_trunc);
+	WRITE32(dp->dl_stateid.si_generation);
+	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
+	WRITE32(0); /* truncate optimization not implemented */
 	WRITE32(len);
-	WRITEMEM(&cb_rec->cbr_fh.fh_base, len);
-	return 0;
+	WRITEMEM(&dp->dl_fh.fh_base, len);
+	hdr->nops++;
 }
 
 static int
@@ -241,17 +249,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args)
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
 {
 	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr = {
-		.ident = args->cbr_ident,
-		.nops   = 1,
+		.ident = args->dl_ident,
 	};
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_cb_compound_hdr(&xdr, &hdr);
-	return (encode_cb_recall(&xdr, args));
+	encode_cb_recall(&xdr, args, &hdr);
+	encode_cb_nops(&hdr);
+	return 0;
 }
 
 
@@ -358,18 +367,21 @@ static struct rpc_program cb_program = {
 		.pipe_dir_name  = "/nfsd4_cb",
 };
 
+static int max_cb_time(void)
+{
+	return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
+}
+
 /* Reference counting, callback cleanup, etc., all look racy as heck.
  * And why is cb_set an atomic? */
 
-static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
+int setup_callback_client(struct nfs4_client *clp)
 {
 	struct sockaddr_in	addr;
-	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
-		.to_initval	= (NFSD_LEASE_TIME/4) * HZ,
-		.to_retries	= 5,
-		.to_maxval	= (NFSD_LEASE_TIME/2) * HZ,
-		.to_exponential	= 1,
+		.to_initval	= max_cb_time(),
+		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
 		.protocol	= IPPROTO_TCP,
@@ -386,7 +398,7 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
 	struct rpc_clnt *client;
 
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	/* Initialize address */
 	memset(&addr, 0, sizeof(addr));
@@ -396,48 +408,77 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp)
 
 	/* Create RPC client */
 	client = rpc_create(&args);
-	if (IS_ERR(client))
+	if (IS_ERR(client)) {
 		dprintk("NFSD: couldn't create callback client: %ld\n",
 			PTR_ERR(client));
-	return client;
+		return PTR_ERR(client);
+	}
+	cb->cb_client = client;
+	return 0;
+
+}
+
+static void warn_no_callback_path(struct nfs4_client *clp, int reason)
+{
+	dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
+		(int)clp->cl_name.len, clp->cl_name.data, reason);
+}
+
+static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_client *clp = calldata;
+
+	if (task->tk_status)
+		warn_no_callback_path(clp, task->tk_status);
+	else
+		atomic_set(&clp->cl_cb_conn.cb_set, 1);
+	put_nfs4_client(clp);
+}
+
+static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+	.rpc_call_done = nfsd4_cb_probe_done,
+};
 
+static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
+{
+	struct auth_cred acred = {
+		.machine_cred = 1
+	};
+
+	/*
+	 * Note in the gss case this doesn't actually have to wait for a
+	 * gss upcall (or any calls to the client); this just creates a
+	 * non-uptodate cred which the rpc state machine will fill in with
+	 * a refresh_upcall later.
+	 */
+	return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
+							RPCAUTH_LOOKUP_NEW);
 }
 
-static int do_probe_callback(void *data)
+void do_probe_callback(struct nfs4_client *clp)
 {
-	struct nfs4_client *clp = data;
-	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	struct rpc_clnt *client;
+	struct rpc_cred *cred;
 	int status;
 
-	client = setup_callback_client(clp);
-	if (IS_ERR(client)) {
-		status = PTR_ERR(client);
-		dprintk("NFSD: couldn't create callback client: %d\n",
-								status);
-		goto out_err;
+	cred = lookup_cb_cred(cb);
+	if (IS_ERR(cred)) {
+		status = PTR_ERR(cred);
+		goto out;
+	}
+	cb->cb_cred = cred;
+	msg.rpc_cred = cb->cb_cred;
+	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
+				&nfsd4_cb_probe_ops, (void *)clp);
+out:
+	if (status) {
+		warn_no_callback_path(clp, status);
+		put_nfs4_client(clp);
 	}
-
-	status = rpc_call_sync(client, &msg, RPC_TASK_SOFT);
-
-	if (status)
-		goto out_release_client;
-
-	cb->cb_client = client;
-	atomic_set(&cb->cb_set, 1);
-	put_nfs4_client(clp);
-	return 0;
-out_release_client:
-	rpc_shutdown_client(client);
-out_err:
-	dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
-		(int)clp->cl_name.len, clp->cl_name.data, status);
-	put_nfs4_client(clp);
-	return 0;
 }
 
 /*
@@ -446,21 +487,65 @@ out_err:
 void
 nfsd4_probe_callback(struct nfs4_client *clp)
 {
-	struct task_struct *t;
+	int status;
 
-	BUG_ON(atomic_read(&clp->cl_callback.cb_set));
+	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
+
+	status = setup_callback_client(clp);
+	if (status) {
+		warn_no_callback_path(clp, status);
+		return;
+	}
 
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
 
-	t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe");
+	do_probe_callback(clp);
+}
 
-	if (IS_ERR(t))
-		atomic_dec(&clp->cl_count);
+static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
 
-	return;
+	switch (task->tk_status) {
+	case -EIO:
+		/* Network partition? */
+		atomic_set(&clp->cl_cb_conn.cb_set, 0);
+		warn_no_callback_path(clp, task->tk_status);
+	case -EBADHANDLE:
+	case -NFS4ERR_BAD_STATEID:
+		/* Race: client probably got cb_recall
+		 * before open reply granting delegation */
+		break;
+	default:
+		/* success, or error we can't handle */
+		return;
+	}
+	if (dp->dl_retries--) {
+		rpc_delay(task, 2*HZ);
+		task->tk_status = 0;
+		rpc_restart_call(task);
+	} else {
+		atomic_set(&clp->cl_cb_conn.cb_set, 0);
+		warn_no_callback_path(clp, task->tk_status);
+	}
+}
+
+static void nfsd4_cb_recall_release(void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
+
+	nfs4_put_delegation(dp);
+	put_nfs4_client(clp);
 }
 
+static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+	.rpc_call_done = nfsd4_cb_recall_done,
+	.rpc_release = nfsd4_cb_recall_release,
+};
+
 /*
  * called with dp->dl_count inc'ed.
  */
@@ -468,41 +553,19 @@ void
 nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
-	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
-	struct nfs4_cb_recall *cbr = &dp->dl_recall;
+	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
-		.rpc_argp = cbr,
+		.rpc_argp = dp,
+		.rpc_cred = clp->cl_cb_conn.cb_cred
 	};
-	int retries = 1;
-	int status = 0;
-
-	cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
-	cbr->cbr_dp = dp;
-
-	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
-	while (retries--) {
-		switch (status) {
-			case -EIO:
-				/* Network partition? */
-				atomic_set(&clp->cl_callback.cb_set, 0);
-			case -EBADHANDLE:
-			case -NFS4ERR_BAD_STATEID:
-				/* Race: client probably got cb_recall
-				 * before open reply granting delegation */
-				break;
-			default:
-				goto out_put_cred;
-		}
-		ssleep(2);
-		status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
+	int status;
+
+	dp->dl_retries = 1;
+	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
+				&nfsd4_cb_recall_ops, dp);
+	if (status) {
+		put_nfs4_client(clp);
+		nfs4_put_delegation(dp);
 	}
-out_put_cred:
-	/*
-	 * Success or failure, now we're either waiting for lease expiration
-	 * or deleg_return.
-	 */
-	put_nfs4_client(clp);
-	nfs4_put_delegation(dp);
-	return;
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b2883e9c6381..7c8801769a3c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -51,6 +51,78 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
+static u32 nfsd_attrmask[] = {
+	NFSD_WRITEABLE_ATTRS_WORD0,
+	NFSD_WRITEABLE_ATTRS_WORD1,
+	NFSD_WRITEABLE_ATTRS_WORD2
+};
+
+static u32 nfsd41_ex_attrmask[] = {
+	NFSD_SUPPATTR_EXCLCREAT_WORD0,
+	NFSD_SUPPATTR_EXCLCREAT_WORD1,
+	NFSD_SUPPATTR_EXCLCREAT_WORD2
+};
+
+static __be32
+check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		   u32 *bmval, u32 *writable)
+{
+	struct dentry *dentry = cstate->current_fh.fh_dentry;
+	struct svc_export *exp = cstate->current_fh.fh_export;
+
+	/*
+	 * Check about attributes are supported by the NFSv4 server or not.
+	 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP.
+	 */
+	if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) ||
+	    (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) ||
+	    (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
+		return nfserr_attrnotsupp;
+
+	/*
+	 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported
+	 * in current environment or not.
+	 */
+	if (bmval[0] & FATTR4_WORD0_ACL) {
+		if (!IS_POSIXACL(dentry->d_inode))
+			return nfserr_attrnotsupp;
+	}
+	if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
+		if (exp->ex_fslocs.locations == NULL)
+			return nfserr_attrnotsupp;
+	}
+
+	/*
+	 * According to spec, read-only attributes return ERR_INVAL.
+	 */
+	if (writable) {
+		if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
+		    (bmval[2] & ~writable[2]))
+			return nfserr_inval;
+	}
+
+	return nfs_ok;
+}
+
+static __be32
+nfsd4_check_open_attributes(struct svc_rqst *rqstp,
+	struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+{
+	__be32 status = nfs_ok;
+
+	if (open->op_create == NFS4_OPEN_CREATE) {
+		if (open->op_createmode == NFS4_CREATE_UNCHECKED
+		    || open->op_createmode == NFS4_CREATE_GUARDED)
+			status = check_attr_support(rqstp, cstate,
+					open->op_bmval, nfsd_attrmask);
+		else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1)
+			status = check_attr_support(rqstp, cstate,
+					open->op_bmval, nfsd41_ex_attrmask);
+	}
+
+	return status;
+}
+
 static inline void
 fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 {
@@ -225,6 +297,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
+	status = nfsd4_check_open_attributes(rqstp, cstate, open);
+	if (status)
+		goto out;
+
 	/* Openowner is now set, so sequence id will get bumped.  Now we need
 	 * these checks before we do any creates: */
 	status = nfserr_grace;
@@ -395,6 +471,11 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		return status;
 
+	status = check_attr_support(rqstp, cstate, create->cr_bmval,
+				    nfsd_attrmask);
+	if (status)
+		return status;
+
 	switch (create->cr_type) {
 	case NF4LNK:
 		/* ugh! we have to null-terminate the linktext, or
@@ -689,6 +770,12 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		return status;
 	status = nfs_ok;
+
+	status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
+				    nfsd_attrmask);
+	if (status)
+		goto out;
+
 	if (setattr->sa_acl != NULL)
 		status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
 					    setattr->sa_acl);
@@ -763,10 +850,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		return status;
 
-	if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion))
-	    || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion))
-	    || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion)))
-		return nfserr_attrnotsupp;
+	status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL);
+	if (status)
+		return status;
+
 	if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
 	    || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
 		return nfserr_inval;
@@ -1226,24 +1313,9 @@ static const char *nfsd4_op_name(unsigned opnum)
 	return "unknown_operation";
 }
 
-#define nfs4svc_decode_voidargs		NULL
-#define nfs4svc_release_void		NULL
 #define nfsd4_voidres			nfsd4_voidargs
-#define nfs4svc_release_compound	NULL
 struct nfsd4_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd4_proc_##name,		\
-   (kxdrproc_t) nfs4svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs4svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs4svc_release_##relt,		\
-   sizeof(struct nfsd4_##argt##args),		\
-   sizeof(struct nfsd4_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
-
 /*
  * TODO: At the present time, the NFSv4 server does not do XID caching
  * of requests.  Implementing XID caching would not be a serious problem,
@@ -1255,8 +1327,23 @@ struct nfsd4_voidargs { int dummy; };
  * better XID's.
  */
 static struct svc_procedure		nfsd_procedures4[2] = {
-  PROC(null,	 void,		void,		void,	  RC_NOCACHE, 1),
-  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE/4)
+	[NFSPROC4_NULL] = {
+		.pc_func = (svc_procfunc) nfsd4_proc_null,
+		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd4_voidargs),
+		.pc_ressize = sizeof(struct nfsd4_voidres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = 1,
+	},
+	[NFSPROC4_COMPOUND] = {
+		.pc_func = (svc_procfunc) nfsd4_proc_compound,
+		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
+		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_argsize = sizeof(struct nfsd4_compoundargs),
+		.pc_ressize = sizeof(struct nfsd4_compoundres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = NFSD_BUFSIZE/4,
+	},
 };
 
 struct svc_version	nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3b711f5147a7..980a216a48c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_file *fp = stp->st_file;
-	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
+	struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
 
 	dprintk("NFSD alloc_init_deleg\n");
 	if (fp->fi_had_conflict)
@@ -203,10 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	get_file(stp->st_vfs_file);
 	dp->dl_vfs_file = stp->st_vfs_file;
 	dp->dl_type = type;
-	dp->dl_recall.cbr_dp = NULL;
-	dp->dl_recall.cbr_ident = cb->cb_ident;
-	dp->dl_recall.cbr_trunc = 0;
-	dp->dl_stateid.si_boot = boot_time;
+	dp->dl_ident = cb->cb_ident;
+	dp->dl_stateid.si_boot = get_seconds();
 	dp->dl_stateid.si_stateownerid = current_delegid++;
 	dp->dl_stateid.si_fileid = 0;
 	dp->dl_stateid.si_generation = 0;
@@ -427,6 +425,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 {
 	int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
 
+	if (fchan->maxreqs < 1)
+		return nfserr_inval;
+	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
+		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
+
 	spin_lock(&nfsd_serv->sv_lock);
 	if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
 		np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
@@ -446,8 +449,8 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
  * fchan holds the client values on input, and the server values on output
  */
 static int init_forechannel_attrs(struct svc_rqst *rqstp,
-				    struct nfsd4_session *session,
-				    struct nfsd4_channel_attrs *fchan)
+				  struct nfsd4_channel_attrs *session_fchan,
+				  struct nfsd4_channel_attrs *fchan)
 {
 	int status = 0;
 	__u32   maxcount = svc_max_payload(rqstp);
@@ -457,21 +460,21 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	/* Use the client's max request and max response size if possible */
 	if (fchan->maxreq_sz > maxcount)
 		fchan->maxreq_sz = maxcount;
-	session->se_fmaxreq_sz = fchan->maxreq_sz;
+	session_fchan->maxreq_sz = fchan->maxreq_sz;
 
 	if (fchan->maxresp_sz > maxcount)
 		fchan->maxresp_sz = maxcount;
-	session->se_fmaxresp_sz = fchan->maxresp_sz;
+	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
 	/* Set the max response cached size our default which is
 	 * a multiple of PAGE_SIZE and small */
-	session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
-	fchan->maxresp_cached = session->se_fmaxresp_cached;
+	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
+	fchan->maxresp_cached = session_fchan->maxresp_cached;
 
 	/* Use the client's maxops if possible */
 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
-	session->se_fmaxops = fchan->maxops;
+	session_fchan->maxops = fchan->maxops;
 
 	/* try to use the client requested number of slots */
 	if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
@@ -483,7 +486,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	 */
 	status = set_forechannel_maxreqs(fchan);
 
-	session->se_fnumslots = fchan->maxreqs;
+	session_fchan->maxreqs = fchan->maxreqs;
 	return status;
 }
 
@@ -497,12 +500,14 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	memset(&tmp, 0, sizeof(tmp));
 
 	/* FIXME: For now, we just accept the client back channel attributes. */
-	status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel);
+	tmp.se_bchannel = cses->back_channel;
+	status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
+					&cses->fore_channel);
 	if (status)
 		goto out;
 
 	/* allocate struct nfsd4_session and slot table in one piece */
-	slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot);
+	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 	if (!new)
 		goto out;
@@ -576,7 +581,7 @@ free_session(struct kref *kref)
 	int i;
 
 	ses = container_of(kref, struct nfsd4_session, se_ref);
-	for (i = 0; i < ses->se_fnumslots; i++) {
+	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
@@ -632,16 +637,20 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static void
 shutdown_callback_client(struct nfs4_client *clp)
 {
-	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
 
 	if (clnt) {
 		/*
 		 * Callback threads take a reference on the client, so there
 		 * should be no outstanding callbacks at this point.
 		 */
-		clp->cl_callback.cb_client = NULL;
+		clp->cl_cb_conn.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
+	if (clp->cl_cb_conn.cb_cred) {
+		put_rpccred(clp->cl_cb_conn.cb_cred);
+		clp->cl_cb_conn.cb_cred = NULL;
+	}
 }
 
 static inline void
@@ -714,7 +723,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
 		return NULL;
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
-	atomic_set(&clp->cl_callback.cb_set, 0);
+	atomic_set(&clp->cl_cb_conn.cb_set, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
@@ -966,7 +975,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
 static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
-	struct nfs4_callback *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 
 	/* Currently, we only support tcp for the callback channel */
 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
@@ -975,6 +984,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
 	                 &cb->cb_addr, &cb->cb_port)))
 		goto out_err;
+	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	return;
@@ -1128,7 +1138,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 	 * is sent (lease renewal).
 	 */
 	if (seq && nfsd4_not_cached(resp)) {
-		seq->maxslots = resp->cstate.session->se_fnumslots;
+		seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
 		return nfs_ok;
 	}
 
@@ -1238,12 +1248,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 			expire_client(conf);
 			goto out_new;
 		}
-		if (ip_addr != conf->cl_addr &&
-		    !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) {
-			/* Client collision. 18.35.4 case 3 */
-			status = nfserr_clid_inuse;
-			goto out;
-		}
 		/*
 		 * Set bit when the owner id and verifier map to an already
 		 * confirmed client id (18.35.3).
@@ -1257,12 +1261,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 		copy_verf(conf, &verf);
 		new = conf;
 		goto out_copy;
-	} else {
-		/* 18.35.4 case 7 */
-		if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
-			status = nfserr_noent;
-			goto out;
-		}
+	}
+
+	/* 18.35.4 case 7 */
+	if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
+		status = nfserr_noent;
+		goto out;
 	}
 
 	unconf  = find_unconfirmed_client_by_str(dname, strhashval, true);
@@ -1471,7 +1475,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		goto out;
 
 	status = nfserr_badslot;
-	if (seq->slotid >= session->se_fnumslots)
+	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
 
 	slot = &session->se_slots[seq->slotid];
@@ -1686,9 +1690,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			atomic_set(&conf->cl_callback.cb_set, 0);
-			gen_confirm(conf);
-			nfsd4_remove_clid_dir(unconf);
+			atomic_set(&conf->cl_cb_conn.cb_set, 0);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -1882,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
-	stp->st_stateid.si_boot = boot_time;
+	stp->st_stateid.si_boot = get_seconds();
 	stp->st_stateid.si_stateownerid = sop->so_id;
 	stp->st_stateid.si_fileid = fp->fi_id;
 	stp->st_stateid.si_generation = 0;
@@ -2059,19 +2061,6 @@ nfs4_file_downgrade(struct file *filp, unsigned int share_access)
 }
 
 /*
- * Recall a delegation
- */
-static int
-do_recall(void *__dp)
-{
-	struct nfs4_delegation *dp = __dp;
-
-	dp->dl_file->fi_had_conflict = true;
-	nfsd4_cb_recall(dp);
-	return 0;
-}
-
-/*
  * Spawn a thread to perform a recall on the delegation represented
  * by the lease (file_lock)
  *
@@ -2082,8 +2071,7 @@ do_recall(void *__dp)
 static
 void nfsd_break_deleg_cb(struct file_lock *fl)
 {
-	struct nfs4_delegation *dp=  (struct nfs4_delegation *)fl->fl_owner;
-	struct task_struct *t;
+	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
 
 	dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
 	if (!dp)
@@ -2111,16 +2099,8 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 	 */
 	fl->fl_break_time = 0;
 
-	t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
-	if (IS_ERR(t)) {
-		struct nfs4_client *clp = dp->dl_client;
-
-		printk(KERN_INFO "NFSD: Callback thread failed for "
-			"for client (clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-		put_nfs4_client(dp->dl_client);
-		nfs4_put_delegation(dp);
-	}
+	dp->dl_file->fi_had_conflict = true;
+	nfsd4_cb_recall(dp);
 }
 
 /*
@@ -2422,7 +2402,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_callback *cb = &sop->so_client->cl_callback;
+	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
 	struct file_lock fl, *flp = &fl;
 	int status, flag = 0;
 
@@ -2614,7 +2594,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	renew_client(clp);
 	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
-			&& !atomic_read(&clp->cl_callback.cb_set))
+			&& !atomic_read(&clp->cl_cb_conn.cb_set))
 		goto out;
 	status = nfs_ok;
 out:
@@ -2738,12 +2718,42 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
 static int
 STALE_STATEID(stateid_t *stateid)
 {
-	if (stateid->si_boot == boot_time)
-		return 0;
-	dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
-		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
-		stateid->si_generation);
-	return 1;
+	if (time_after((unsigned long)boot_time,
+			(unsigned long)stateid->si_boot)) {
+		dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
+			stateid->si_boot, stateid->si_stateownerid,
+			stateid->si_fileid, stateid->si_generation);
+		return 1;
+	}
+	return 0;
+}
+
+static int
+EXPIRED_STATEID(stateid_t *stateid)
+{
+	if (time_before((unsigned long)boot_time,
+			((unsigned long)stateid->si_boot)) &&
+	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
+		dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
+			stateid->si_boot, stateid->si_stateownerid,
+			stateid->si_fileid, stateid->si_generation);
+		return 1;
+	}
+	return 0;
+}
+
+static __be32
+stateid_error_map(stateid_t *stateid)
+{
+	if (STALE_STATEID(stateid))
+		return nfserr_stale_stateid;
+	if (EXPIRED_STATEID(stateid))
+		return nfserr_expired;
+
+	dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
+		stateid->si_boot, stateid->si_stateownerid,
+		stateid->si_fileid, stateid->si_generation);
+	return nfserr_bad_stateid;
 }
 
 static inline int
@@ -2867,8 +2877,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
 	status = nfserr_bad_stateid;
 	if (is_delegation_stateid(stateid)) {
 		dp = find_delegation_stateid(ino, stateid);
-		if (!dp)
+		if (!dp) {
+			status = stateid_error_map(stateid);
 			goto out;
+		}
 		status = check_stateid_generation(stateid, &dp->dl_stateid,
 						  flags);
 		if (status)
@@ -2881,8 +2893,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
 			*filpp = dp->dl_vfs_file;
 	} else { /* open or lock stateid */
 		stp = find_stateid(stateid, flags);
-		if (!stp)
+		if (!stp) {
+			status = stateid_error_map(stateid);
 			goto out;
+		}
 		if (nfs4_check_fh(current_fh, stp))
 			goto out;
 		if (!stp->st_stateowner->so_confirmed)
@@ -2956,7 +2970,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 		 */
 		sop = search_close_lru(stateid->si_stateownerid, flags);
 		if (sop == NULL)
-			return nfserr_bad_stateid;
+			return stateid_error_map(stateid);
 		*sopp = sop;
 		goto check_replay;
 	}
@@ -3227,8 +3241,10 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (!is_delegation_stateid(stateid))
 		goto out;
 	dp = find_delegation_stateid(inode, stateid);
-	if (!dp)
+	if (!dp) {
+		status = stateid_error_map(stateid);
 		goto out;
+	}
 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
 	if (status)
 		goto out;
@@ -3455,7 +3471,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
-	stp->st_stateid.si_boot = boot_time;
+	stp->st_stateid.si_boot = get_seconds();
 	stp->st_stateid.si_stateownerid = sop->so_id;
 	stp->st_stateid.si_fileid = fp->fi_id;
 	stp->st_stateid.si_generation = 0;
@@ -3987,6 +4003,7 @@ nfs4_state_init(void)
 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
 		INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
 		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
+		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	}
 	for (i = 0; i < SESSION_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&sessionid_hashtbl[i]);
@@ -4009,8 +4026,6 @@ nfs4_state_init(void)
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	for (i = 0; i < CLIENT_HASH_SIZE; i++)
-		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	reclaim_str_hashtbl_size = 0;
 	return 0;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b73549d293be..2dcc7feaa6ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -83,16 +83,6 @@ check_filename(char *str, int len, __be32 err)
 	return 0;
 }
 
-/*
- * START OF "GENERIC" DECODE ROUTINES.
- *   These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
 #define DECODE_HEAD				\
 	__be32 *p;				\
 	__be32 status
@@ -254,20 +244,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 	DECODE_TAIL;
 }
 
-static u32 nfsd_attrmask[] = {
-	NFSD_WRITEABLE_ATTRS_WORD0,
-	NFSD_WRITEABLE_ATTRS_WORD1,
-	NFSD_WRITEABLE_ATTRS_WORD2
-};
-
-static u32 nfsd41_ex_attrmask[] = {
-	NFSD_SUPPATTR_EXCLCREAT_WORD0,
-	NFSD_SUPPATTR_EXCLCREAT_WORD1,
-	NFSD_SUPPATTR_EXCLCREAT_WORD2
-};
-
 static __be32
-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 		   struct iattr *iattr, struct nfs4_acl **acl)
 {
 	int expected_len, len = 0;
@@ -280,18 +258,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
 	if ((status = nfsd4_decode_bitmap(argp, bmval)))
 		return status;
 
-	/*
-	 * According to spec, unsupported attributes return ERR_ATTRNOTSUPP;
-	 * read-only attributes return ERR_INVAL.
-	 */
-	if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) ||
-	    (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) ||
-	    (bmval[2] & ~nfsd_suppattrs2(argp->minorversion)))
-		return nfserr_attrnotsupp;
-	if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) ||
-	    (bmval[2] & ~writable[2]))
-		return nfserr_inval;
-
 	READ_BUF(4);
 	READ32(expected_len);
 
@@ -424,8 +390,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable,
 			goto xdr_error;
 		}
 	}
-	BUG_ON(bmval[2]);	/* no such writeable attr supported yet */
-	if (len != expected_len)
+	if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
+	    || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
+	    || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
+		READ_BUF(expected_len - len);
+	else if (len != expected_len)
 		goto xdr_error;
 
 	DECODE_TAIL;
@@ -518,8 +487,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 	if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
 		return status;
 
-	status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask,
-				    &create->cr_iattr, &create->cr_acl);
+	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
+				    &create->cr_acl);
 	if (status)
 		goto out;
 
@@ -682,7 +651,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 		case NFS4_CREATE_UNCHECKED:
 		case NFS4_CREATE_GUARDED:
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				nfsd_attrmask, &open->op_iattr, &open->op_acl);
+				&open->op_iattr, &open->op_acl);
 			if (status)
 				goto out;
 			break;
@@ -696,8 +665,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 			READ_BUF(8);
 			COPYMEM(open->op_verf.data, 8);
 			status = nfsd4_decode_fattr(argp, open->op_bmval,
-				nfsd41_ex_attrmask, &open->op_iattr,
-				&open->op_acl);
+				&open->op_iattr, &open->op_acl);
 			if (status)
 				goto out;
 			break;
@@ -893,8 +861,8 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
 	status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
 	if (status)
 		return status;
-	return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask,
-				  &setattr->sa_iattr, &setattr->sa_acl);
+	return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
+				  &setattr->sa_acl);
 }
 
 static __be32
@@ -1328,64 +1296,64 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 };
 
 static nfsd4_dec nfsd41_dec_ops[] = {
-	[OP_ACCESS]		(nfsd4_dec)nfsd4_decode_access,
-	[OP_CLOSE]		(nfsd4_dec)nfsd4_decode_close,
-	[OP_COMMIT]		(nfsd4_dec)nfsd4_decode_commit,
-	[OP_CREATE]		(nfsd4_dec)nfsd4_decode_create,
-	[OP_DELEGPURGE]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_DELEGRETURN]	(nfsd4_dec)nfsd4_decode_delegreturn,
-	[OP_GETATTR]		(nfsd4_dec)nfsd4_decode_getattr,
-	[OP_GETFH]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_LINK]		(nfsd4_dec)nfsd4_decode_link,
-	[OP_LOCK]		(nfsd4_dec)nfsd4_decode_lock,
-	[OP_LOCKT]		(nfsd4_dec)nfsd4_decode_lockt,
-	[OP_LOCKU]		(nfsd4_dec)nfsd4_decode_locku,
-	[OP_LOOKUP]		(nfsd4_dec)nfsd4_decode_lookup,
-	[OP_LOOKUPP]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_NVERIFY]		(nfsd4_dec)nfsd4_decode_verify,
-	[OP_OPEN]		(nfsd4_dec)nfsd4_decode_open,
-	[OP_OPENATTR]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_OPEN_CONFIRM]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_OPEN_DOWNGRADE]	(nfsd4_dec)nfsd4_decode_open_downgrade,
-	[OP_PUTFH]		(nfsd4_dec)nfsd4_decode_putfh,
-	[OP_PUTPUBFH]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_PUTROOTFH]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_READ]		(nfsd4_dec)nfsd4_decode_read,
-	[OP_READDIR]		(nfsd4_dec)nfsd4_decode_readdir,
-	[OP_READLINK]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_REMOVE]		(nfsd4_dec)nfsd4_decode_remove,
-	[OP_RENAME]		(nfsd4_dec)nfsd4_decode_rename,
-	[OP_RENEW]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_RESTOREFH]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_SAVEFH]		(nfsd4_dec)nfsd4_decode_noop,
-	[OP_SECINFO]		(nfsd4_dec)nfsd4_decode_secinfo,
-	[OP_SETATTR]		(nfsd4_dec)nfsd4_decode_setattr,
-	[OP_SETCLIENTID]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_VERIFY]		(nfsd4_dec)nfsd4_decode_verify,
-	[OP_WRITE]		(nfsd4_dec)nfsd4_decode_write,
-	[OP_RELEASE_LOCKOWNER]	(nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_ACCESS]		= (nfsd4_dec)nfsd4_decode_access,
+	[OP_CLOSE]		= (nfsd4_dec)nfsd4_decode_close,
+	[OP_COMMIT]		= (nfsd4_dec)nfsd4_decode_commit,
+	[OP_CREATE]		= (nfsd4_dec)nfsd4_decode_create,
+	[OP_DELEGPURGE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DELEGRETURN]	= (nfsd4_dec)nfsd4_decode_delegreturn,
+	[OP_GETATTR]		= (nfsd4_dec)nfsd4_decode_getattr,
+	[OP_GETFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_LINK]		= (nfsd4_dec)nfsd4_decode_link,
+	[OP_LOCK]		= (nfsd4_dec)nfsd4_decode_lock,
+	[OP_LOCKT]		= (nfsd4_dec)nfsd4_decode_lockt,
+	[OP_LOCKU]		= (nfsd4_dec)nfsd4_decode_locku,
+	[OP_LOOKUP]		= (nfsd4_dec)nfsd4_decode_lookup,
+	[OP_LOOKUPP]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_NVERIFY]		= (nfsd4_dec)nfsd4_decode_verify,
+	[OP_OPEN]		= (nfsd4_dec)nfsd4_decode_open,
+	[OP_OPENATTR]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OPEN_CONFIRM]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OPEN_DOWNGRADE]	= (nfsd4_dec)nfsd4_decode_open_downgrade,
+	[OP_PUTFH]		= (nfsd4_dec)nfsd4_decode_putfh,
+	[OP_PUTPUBFH]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_PUTROOTFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_READ]		= (nfsd4_dec)nfsd4_decode_read,
+	[OP_READDIR]		= (nfsd4_dec)nfsd4_decode_readdir,
+	[OP_READLINK]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_REMOVE]		= (nfsd4_dec)nfsd4_decode_remove,
+	[OP_RENAME]		= (nfsd4_dec)nfsd4_decode_rename,
+	[OP_RENEW]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_RESTOREFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_SAVEFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_SECINFO]		= (nfsd4_dec)nfsd4_decode_secinfo,
+	[OP_SETATTR]		= (nfsd4_dec)nfsd4_decode_setattr,
+	[OP_SETCLIENTID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_VERIFY]		= (nfsd4_dec)nfsd4_decode_verify,
+	[OP_WRITE]		= (nfsd4_dec)nfsd4_decode_write,
+	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_notsupp,
 
 	/* new operations for NFSv4.1 */
-	[OP_BACKCHANNEL_CTL]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_EXCHANGE_ID]	(nfsd4_dec)nfsd4_decode_exchange_id,
-	[OP_CREATE_SESSION]	(nfsd4_dec)nfsd4_decode_create_session,
-	[OP_DESTROY_SESSION]	(nfsd4_dec)nfsd4_decode_destroy_session,
-	[OP_FREE_STATEID]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_GET_DIR_DELEGATION]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_GETDEVICEINFO]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_GETDEVICELIST]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_LAYOUTCOMMIT]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_LAYOUTGET]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_LAYOUTRETURN]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_SECINFO_NO_NAME]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_SEQUENCE]		(nfsd4_dec)nfsd4_decode_sequence,
-	[OP_SET_SSV]		(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_TEST_STATEID]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_WANT_DELEGATION]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_DESTROY_CLIENTID]	(nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_RECLAIM_COMPLETE]	(nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_EXCHANGE_ID]	= (nfsd4_dec)nfsd4_decode_exchange_id,
+	[OP_CREATE_SESSION]	= (nfsd4_dec)nfsd4_decode_create_session,
+	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_TEST_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_WANT_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DESTROY_CLIENTID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
 struct nfsd4_minorversion_ops {
@@ -1489,21 +1457,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 
 	DECODE_TAIL;
 }
-/*
- * END OF "GENERIC" DECODE ROUTINES.
- */
-
-/*
- * START OF "GENERIC" ENCODE ROUTINES.
- *   These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define ENCODE_HEAD              __be32 *p
 
 #define WRITE32(n)               *p++ = htonl(n)
 #define WRITE64(n)               do {				\
@@ -1515,13 +1468,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	memcpy(p, ptr, nbytes);					\
 	p += XDR_QUADLEN(nbytes);				\
 }} while (0)
-#define WRITECINFO(c)		do {				\
-	*p++ = htonl(c.atomic);					\
-	*p++ = htonl(c.before_ctime_sec);				\
-	*p++ = htonl(c.before_ctime_nsec);				\
-	*p++ = htonl(c.after_ctime_sec);				\
-	*p++ = htonl(c.after_ctime_nsec);				\
-} while (0)
+
+static void write32(__be32 **p, u32 n)
+{
+	*(*p)++ = n;
+}
+
+static void write64(__be32 **p, u64 n)
+{
+	write32(p, (u32)(n >> 32));
+	write32(p, (u32)n);
+}
+
+static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+{
+	if (IS_I_VERSION(inode)) {
+		write64(p, inode->i_version);
+	} else {
+		write32(p, stat->ctime.tv_sec);
+		write32(p, stat->ctime.tv_nsec);
+	}
+}
+
+static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+{
+	write32(p, c->atomic);
+	if (c->change_supported) {
+		write64(p, c->before_change);
+		write64(p, c->after_change);
+	} else {
+		write32(p, c->before_ctime_sec);
+		write32(p, c->before_ctime_nsec);
+		write32(p, c->after_ctime_sec);
+		write32(p, c->after_ctime_nsec);
+	}
+}
 
 #define RESERVE_SPACE(nbytes)	do {				\
 	p = resp->p;						\
@@ -1874,16 +1855,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
-		/*
-		 * Note: This _must_ be consistent with the scheme for writing
-		 * change_info, so any changes made here must be reflected there
-		 * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
-		 * macro above.)
-		 */
 		if ((buflen -= 8) < 0)
 			goto out_resource;
-		WRITE32(stat.ctime.tv_sec);
-		WRITE32(stat.ctime.tv_nsec);
+		write_change(&p, &stat, dentry->d_inode);
 	}
 	if (bmval0 & FATTR4_WORD0_SIZE) {
 		if ((buflen -= 8) < 0)
@@ -2348,7 +2322,7 @@ fail:
 static void
 nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	RESERVE_SPACE(sizeof(stateid_t));
 	WRITE32(sid->si_generation);
@@ -2359,7 +2333,7 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
 static __be32
 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(8);
@@ -2386,7 +2360,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
 static __be32
 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(8);
@@ -2399,11 +2373,11 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 static __be32
 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(32);
-		WRITECINFO(create->cr_cinfo);
+		write_cinfo(&p, &create->cr_cinfo);
 		WRITE32(2);
 		WRITE32(create->cr_bmval[0]);
 		WRITE32(create->cr_bmval[1]);
@@ -2435,7 +2409,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
 {
 	struct svc_fh *fhp = *fhpp;
 	unsigned int len;
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		len = fhp->fh_handle.fh_size;
@@ -2454,7 +2428,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
 static void
 nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0));
 	WRITE64(ld->ld_start);
@@ -2510,11 +2484,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
 static __be32
 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(20);
-		WRITECINFO(link->li_cinfo);
+		write_cinfo(&p, &link->li_cinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
@@ -2524,7 +2498,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
 static __be32
 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 	ENCODE_SEQID_OP_HEAD;
 
 	if (nfserr)
@@ -2532,7 +2506,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
 
 	nfsd4_encode_stateid(resp, &open->op_stateid);
 	RESERVE_SPACE(40);
-	WRITECINFO(open->op_cinfo);
+	write_cinfo(&p, &open->op_cinfo);
 	WRITE32(open->op_rflags);
 	WRITE32(2);
 	WRITE32(open->op_bmval[0]);
@@ -2619,7 +2593,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	int v, pn;
 	unsigned long maxcount; 
 	long len;
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
@@ -2681,7 +2655,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 {
 	int maxcount;
 	char *page;
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
@@ -2730,7 +2704,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	int maxcount;
 	loff_t offset;
 	__be32 *page, *savep, *tailbase;
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
@@ -2806,11 +2780,11 @@ err_no_verf:
 static __be32
 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(20);
-		WRITECINFO(remove->rm_cinfo);
+		write_cinfo(&p, &remove->rm_cinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
@@ -2819,12 +2793,12 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 static __be32
 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(40);
-		WRITECINFO(rename->rn_sinfo);
-		WRITECINFO(rename->rn_tinfo);
+		write_cinfo(&p, &rename->rn_sinfo);
+		write_cinfo(&p, &rename->rn_tinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
@@ -2839,7 +2813,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
 	u32 nflavs;
 	struct exp_flavor_info *flavs;
 	struct exp_flavor_info def_flavs[2];
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		goto out;
@@ -2904,7 +2878,7 @@ out:
 static __be32
 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	RESERVE_SPACE(12);
 	if (nfserr) {
@@ -2924,7 +2898,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 static __be32
 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(8 + sizeof(nfs4_verifier));
@@ -2944,7 +2918,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
 static __be32
 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (!nfserr) {
 		RESERVE_SPACE(16);
@@ -2960,7 +2934,7 @@ static __be32
 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr,
 			 struct nfsd4_exchange_id *exid)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 	char *major_id;
 	char *server_scope;
 	int major_id_sz;
@@ -3015,7 +2989,7 @@ static __be32
 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr,
 			    struct nfsd4_create_session *sess)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
@@ -3071,7 +3045,7 @@ __be32
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
 		      struct nfsd4_sequence *seq)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 
 	if (nfserr)
 		return nfserr;
@@ -3209,7 +3183,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
 	dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
 		length, xb->page_len, tlen, pad);
 
-	if (length <= session->se_fmaxresp_cached)
+	if (length <= session->se_fchannel.maxresp_cached)
 		return status;
 	else
 		return nfserr_rep_too_big_to_cache;
@@ -3219,7 +3193,7 @@ void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
 	__be32 *statp;
-	ENCODE_HEAD;
+	__be32 *p;
 
 	RESERVE_SPACE(8);
 	WRITE32(op->opnum);
@@ -3253,7 +3227,7 @@ status:
 void
 nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
-	ENCODE_HEAD;
+	__be32 *p;
 	struct nfs4_replay *rp = op->replay;
 
 	BUG_ON(!rp);
@@ -3268,10 +3242,6 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	ADJUST_ARGS();
 }
 
-/*
- * END OF "GENERIC" ENCODE ROUTINES.
- */
-
 int
 nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 5bfc2ac60d54..4638635c5d87 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -29,15 +29,24 @@
  */
 #define CACHESIZE		1024
 #define HASHSIZE		64
-#define REQHASH(xid)		(((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1))
 
-static struct hlist_head *	hash_list;
+static struct hlist_head *	cache_hash;
 static struct list_head 	lru_head;
 static int			cache_disabled = 1;
 
+/*
+ * Calculate the hash index from an XID.
+ */
+static inline u32 request_hash(u32 xid)
+{
+	u32 h = xid;
+	h ^= (xid >> 24);
+	return h & (HASHSIZE-1);
+}
+
 static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 
-/* 
+/*
  * locking for the reply cache:
  * A cache entry is "single use" if c_state == RC_INPROG
  * Otherwise, it when accessing _prev or _next, the lock must be held.
@@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void)
 		i--;
 	}
 
-	hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
-	if (!hash_list)
+	cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+	if (!cache_hash)
 		goto out_nomem;
 
 	cache_disabled = 0;
@@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void)
 
 	cache_disabled = 1;
 
-	kfree (hash_list);
-	hash_list = NULL;
+	kfree (cache_hash);
+	cache_hash = NULL;
 }
 
 /*
@@ -108,7 +117,7 @@ static void
 hash_refile(struct svc_cacherep *rp)
 {
 	hlist_del_init(&rp->c_hash);
-	hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid));
+	hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
 }
 
 /*
@@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	spin_lock(&cache_lock);
 	rtn = RC_DOIT;
 
-	rh = &hash_list[REQHASH(xid)];
+	rh = &cache_hash[request_hash(xid)];
 	hlist_for_each_entry(rp, hn, rh, c_hash) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
@@ -165,8 +174,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	}
 	}
 
-	/* This should not happen */
-	if (rp == NULL) {
+	/* All entries on the LRU are in-progress. This should not happen */
+	if (&rp->c_lru == &lru_head) {
 		static int	complaints;
 
 		printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
@@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 
 	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 	len >>= 2;
-	
+
 	/* Don't cache excessive amounts of data and XDR failures */
 	if (!statp || len > (256 >> 2)) {
 		rp->c_state = RC_UNUSED;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index af16849d243a..1250fb978ac1 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -207,10 +207,14 @@ static struct file_operations pool_stats_operations = {
 static ssize_t write_svc(struct file *file, char *buf, size_t size)
 {
 	struct nfsctl_svc *data;
+	int err;
 	if (size < sizeof(*data))
 		return -EINVAL;
 	data = (struct nfsctl_svc*) buf;
-	return nfsd_svc(data->svc_port, data->svc_nthreads);
+	err = nfsd_svc(data->svc_port, data->svc_nthreads);
+	if (err < 0)
+		return err;
+	return 0;
 }
 
 /**
@@ -692,11 +696,12 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 		if (newthreads < 0)
 			return -EINVAL;
 		rv = nfsd_svc(NFS_PORT, newthreads);
-		if (rv)
+		if (rv < 0)
 			return rv;
-	}
-	sprintf(buf, "%d\n", nfsd_nrthreads());
-	return strlen(buf);
+	} else
+		rv = nfsd_nrthreads();
+
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
 }
 
 /**
@@ -793,7 +798,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
 	char *vers, *minorp, sign;
-	int len, num;
+	int len, num, remaining;
 	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
@@ -840,32 +845,50 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 			}
 		next:
 			vers += len + 1;
-			tlen += len;
 		} while ((len = qword_get(&mesg, vers, size)) > 0);
 		/* If all get turned off, turn them back on, as
 		 * having no versions is BAD
 		 */
 		nfsd_reset_versions();
 	}
+
 	/* Now write current state into reply buffer */
 	len = 0;
 	sep = "";
+	remaining = SIMPLE_TRANSACTION_LIMIT;
 	for (num=2 ; num <= 4 ; num++)
 		if (nfsd_vers(num, NFSD_AVAIL)) {
-			len += sprintf(buf+len, "%s%c%d", sep,
+			len = snprintf(buf, remaining, "%s%c%d", sep,
 				       nfsd_vers(num, NFSD_TEST)?'+':'-',
 				       num);
 			sep = " ";
+
+			if (len > remaining)
+				break;
+			remaining -= len;
+			buf += len;
+			tlen += len;
 		}
 	if (nfsd_vers(4, NFSD_AVAIL))
-		for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++)
-			len += sprintf(buf+len, " %c4.%u",
+		for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
+		     minor++) {
+			len = snprintf(buf, remaining, " %c4.%u",
 					(nfsd_vers(4, NFSD_TEST) &&
 					 nfsd_minorversion(minor, NFSD_TEST)) ?
 						'+' : '-',
 					minor);
-	len += sprintf(buf+len, "\n");
-	return len;
+
+			if (len > remaining)
+				break;
+			remaining -= len;
+			buf += len;
+			tlen += len;
+		}
+
+	len = snprintf(buf, remaining, "\n");
+	if (len > remaining)
+		return -EINVAL;
+	return tlen + len;
 }
 
 /**
@@ -910,104 +933,143 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
 	return rv;
 }
 
-static ssize_t __write_ports(struct file *file, char *buf, size_t size)
+/*
+ * Zero-length write.  Return a list of NFSD's current listener
+ * transports.
+ */
+static ssize_t __write_ports_names(char *buf)
 {
-	if (size == 0) {
-		int len = 0;
+	if (nfsd_serv == NULL)
+		return 0;
+	return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
+}
 
-		if (nfsd_serv)
-			len = svc_xprt_names(nfsd_serv, buf, 0);
-		return len;
-	}
-	/* Either a single 'fd' number is written, in which
-	 * case it must be for a socket of a supported family/protocol,
-	 * and we use it as an nfsd socket, or
-	 * A '-' followed by the 'name' of a socket in which case
-	 * we close the socket.
-	 */
-	if (isdigit(buf[0])) {
-		char *mesg = buf;
-		int fd;
-		int err;
-		err = get_int(&mesg, &fd);
-		if (err)
-			return -EINVAL;
-		if (fd < 0)
-			return -EINVAL;
-		err = nfsd_create_serv();
-		if (!err) {
-			err = svc_addsock(nfsd_serv, fd, buf);
-			if (err >= 0) {
-				err = lockd_up();
-				if (err < 0)
-					svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
-			}
-			/* Decrease the count, but don't shutdown the
-			 * the service
-			 */
-			nfsd_serv->sv_nrthreads--;
-		}
-		return err < 0 ? err : 0;
-	}
-	if (buf[0] == '-' && isdigit(buf[1])) {
-		char *toclose = kstrdup(buf+1, GFP_KERNEL);
-		int len = 0;
-		if (!toclose)
-			return -ENOMEM;
-		if (nfsd_serv)
-			len = svc_sock_names(buf, nfsd_serv, toclose);
-		if (len >= 0)
-			lockd_down();
-		kfree(toclose);
-		return len;
-	}
-	/*
-	 * Add a transport listener by writing it's transport name
-	 */
-	if (isalpha(buf[0])) {
-		int err;
-		char transport[16];
-		int port;
-		if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
-			if (port < 1 || port > 65535)
-				return -EINVAL;
-			err = nfsd_create_serv();
-			if (!err) {
-				err = svc_create_xprt(nfsd_serv,
-						      transport, PF_INET, port,
-						      SVC_SOCK_ANONYMOUS);
-				if (err == -ENOENT)
-					/* Give a reasonable perror msg for
-					 * bad transport string */
-					err = -EPROTONOSUPPORT;
-			}
-			return err < 0 ? err : 0;
-		}
-	}
-	/*
-	 * Remove a transport by writing it's transport name and port number
-	 */
-	if (buf[0] == '-' && isalpha(buf[1])) {
-		struct svc_xprt *xprt;
-		int err = -EINVAL;
-		char transport[16];
-		int port;
-		if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
-			if (port < 1 || port > 65535)
-				return -EINVAL;
-			if (nfsd_serv) {
-				xprt = svc_find_xprt(nfsd_serv, transport,
-						     AF_UNSPEC, port);
-				if (xprt) {
-					svc_close_xprt(xprt);
-					svc_xprt_put(xprt);
-					err = 0;
-				} else
-					err = -ENOTCONN;
-			}
-			return err < 0 ? err : 0;
-		}
+/*
+ * A single 'fd' number was written, in which case it must be for
+ * a socket of a supported family/protocol, and we use it as an
+ * nfsd listener.
+ */
+static ssize_t __write_ports_addfd(char *buf)
+{
+	char *mesg = buf;
+	int fd, err;
+
+	err = get_int(&mesg, &fd);
+	if (err != 0 || fd < 0)
+		return -EINVAL;
+
+	err = nfsd_create_serv();
+	if (err != 0)
+		return err;
+
+	err = lockd_up();
+	if (err != 0)
+		goto out;
+
+	err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+	if (err < 0)
+		lockd_down();
+
+out:
+	/* Decrease the count, but don't shut down the service */
+	nfsd_serv->sv_nrthreads--;
+	return err;
+}
+
+/*
+ * A '-' followed by the 'name' of a socket means we close the socket.
+ */
+static ssize_t __write_ports_delfd(char *buf)
+{
+	char *toclose;
+	int len = 0;
+
+	toclose = kstrdup(buf + 1, GFP_KERNEL);
+	if (toclose == NULL)
+		return -ENOMEM;
+
+	if (nfsd_serv != NULL)
+		len = svc_sock_names(nfsd_serv, buf,
+					SIMPLE_TRANSACTION_LIMIT, toclose);
+	if (len >= 0)
+		lockd_down();
+
+	kfree(toclose);
+	return len;
+}
+
+/*
+ * A transport listener is added by writing it's transport name and
+ * a port number.
+ */
+static ssize_t __write_ports_addxprt(char *buf)
+{
+	char transport[16];
+	int port, err;
+
+	if (sscanf(buf, "%15s %4u", transport, &port) != 2)
+		return -EINVAL;
+
+	if (port < 1 || port > USHORT_MAX)
+		return -EINVAL;
+
+	err = nfsd_create_serv();
+	if (err != 0)
+		return err;
+
+	err = svc_create_xprt(nfsd_serv, transport,
+				PF_INET, port, SVC_SOCK_ANONYMOUS);
+	if (err < 0) {
+		/* Give a reasonable perror msg for bad transport string */
+		if (err == -ENOENT)
+			err = -EPROTONOSUPPORT;
+		return err;
 	}
+	return 0;
+}
+
+/*
+ * A transport listener is removed by writing a "-", it's transport
+ * name, and it's port number.
+ */
+static ssize_t __write_ports_delxprt(char *buf)
+{
+	struct svc_xprt *xprt;
+	char transport[16];
+	int port;
+
+	if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2)
+		return -EINVAL;
+
+	if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL)
+		return -EINVAL;
+
+	xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
+	if (xprt == NULL)
+		return -ENOTCONN;
+
+	svc_close_xprt(xprt);
+	svc_xprt_put(xprt);
+	return 0;
+}
+
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
+{
+	if (size == 0)
+		return __write_ports_names(buf);
+
+	if (isdigit(buf[0]))
+		return __write_ports_addfd(buf);
+
+	if (buf[0] == '-' && isdigit(buf[1]))
+		return __write_ports_delfd(buf);
+
+	if (isalpha(buf[0]))
+		return __write_ports_addxprt(buf);
+
+	if (buf[0] == '-' && isalpha(buf[1]))
+		return __write_ports_delxprt(buf);
+
 	return -EINVAL;
 }
 
@@ -1030,7 +1092,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
  *			buf:		C string containing an unsigned
  *					integer value representing a bound
  *					but unconnected socket that is to be
- *					used as an NFSD listener
+ *					used as an NFSD listener; listen(3)
+ *					must be called for a SOCK_STREAM
+ *					socket, otherwise it is ignored
  *			size:		non-zero length of C string in @buf
  * Output:
  *	On success:	NFS service is started;
@@ -1138,7 +1202,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 		nfsd_max_blksize = bsize;
 		mutex_unlock(&nfsd_mutex);
 	}
-	return sprintf(buf, "%d\n", nfsd_max_blksize);
+
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n",
+							nfsd_max_blksize);
 }
 
 #ifdef CONFIG_NFSD_V4
@@ -1162,8 +1228,9 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 			return -EINVAL;
 		nfs4_reset_lease(lease);
 	}
-	sprintf(buf, "%ld\n", nfs4_lease_time());
-	return strlen(buf);
+
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
+							nfs4_lease_time());
 }
 
 /**
@@ -1219,8 +1286,9 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
 
 		status = nfs4_reset_recoverydir(recdir);
 	}
-	sprintf(buf, "%s\n", nfs4_recoverydir());
-	return strlen(buf);
+
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n",
+							nfs4_recoverydir());
 }
 
 /**
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 9f1ca17293d3..8847f3fbfc1e 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -27,9 +27,6 @@
 #define NFSDDBG_FACILITY		NFSDDBG_FH
 
 
-static int nfsd_nr_verified;
-static int nfsd_nr_put;
-
 /*
  * our acceptability function.
  * if NOSUBTREECHECK, accept anything
@@ -251,7 +248,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 
 	fhp->fh_dentry = dentry;
 	fhp->fh_export = exp;
-	nfsd_nr_verified++;
 	return 0;
 out:
 	exp_put(exp);
@@ -552,7 +548,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 			return nfserr_opnotsupp;
 	}
 
-	nfsd_nr_verified++;
 	return 0;
 }
 
@@ -609,7 +604,6 @@ fh_put(struct svc_fh *fhp)
 		fhp->fh_pre_saved = 0;
 		fhp->fh_post_saved = 0;
 #endif
-		nfsd_nr_put++;
 	}
 	if (exp) {
 		cache_put(&exp->h, &svc_export_cache);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e298e260b5f1..0eb9c820b7a6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -533,45 +533,179 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
  * NFSv2 Server procedures.
  * Only the results of non-idempotent operations are cached.
  */
-#define nfsd_proc_none		NULL
-#define nfssvc_release_none	NULL
 struct nfsd_void { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd_proc_##name,		\
-   (kxdrproc_t) nfssvc_decode_##argt,		\
-   (kxdrproc_t) nfssvc_encode_##rest,		\
-   (kxdrproc_t) nfssvc_release_##relt,		\
-   sizeof(struct nfsd_##argt),			\
-   sizeof(struct nfsd_##rest),			\
-   0,						\
-   cache,					\
-   respsize,				       	\
- }
-
 #define ST 1		/* status */
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
 static struct svc_procedure		nfsd_procedures2[18] = {
-  PROC(null,	 void,		void,		none,		RC_NOCACHE, ST),
-  PROC(getattr,	 fhandle,	attrstat,	fhandle,	RC_NOCACHE, ST+AT),
-  PROC(setattr,  sattrargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
-  PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
-  PROC(lookup,	 diropargs,	diropres,	fhandle,	RC_NOCACHE, ST+FH+AT),
-  PROC(readlink, readlinkargs,	readlinkres,	none,		RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
-  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
-  PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
-  PROC(write,	 writeargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
-  PROC(create,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
-  PROC(remove,	 diropargs,	void,		none,		RC_REPLSTAT, ST),
-  PROC(rename,	 renameargs,	void,		none,		RC_REPLSTAT, ST),
-  PROC(link,	 linkargs,	void,		none,		RC_REPLSTAT, ST),
-  PROC(symlink,	 symlinkargs,	void,		none,		RC_REPLSTAT, ST),
-  PROC(mkdir,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
-  PROC(rmdir,	 diropargs,	void,		none,		RC_REPLSTAT, ST),
-  PROC(readdir,	 readdirargs,	readdirres,	none,		RC_NOCACHE, 0),
-  PROC(statfs,	 fhandle,	statfsres,	none,		RC_NOCACHE, ST+5),
+	[NFSPROC_NULL] = {
+		.pc_func = (svc_procfunc) nfsd_proc_null,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_GETATTR] = {
+		.pc_func = (svc_procfunc) nfsd_proc_getattr,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_fhandle),
+		.pc_ressize = sizeof(struct nfsd_attrstat),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT,
+	},
+	[NFSPROC_SETATTR] = {
+		.pc_func = (svc_procfunc) nfsd_proc_setattr,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_sattrargs),
+		.pc_ressize = sizeof(struct nfsd_attrstat),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+AT,
+	},
+	[NFSPROC_ROOT] = {
+		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_LOOKUP] = {
+		.pc_func = (svc_procfunc) nfsd_proc_lookup,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_diropargs),
+		.pc_ressize = sizeof(struct nfsd_diropres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+FH+AT,
+	},
+	[NFSPROC_READLINK] = {
+		.pc_func = (svc_procfunc) nfsd_proc_readlink,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_argsize = sizeof(struct nfsd_readlinkargs),
+		.pc_ressize = sizeof(struct nfsd_readlinkres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
+	},
+	[NFSPROC_READ] = {
+		.pc_func = (svc_procfunc) nfsd_proc_read,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_readargs),
+		.pc_ressize = sizeof(struct nfsd_readres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
+	},
+	[NFSPROC_WRITECACHE] = {
+		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_WRITE] = {
+		.pc_func = (svc_procfunc) nfsd_proc_write,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_writeargs),
+		.pc_ressize = sizeof(struct nfsd_attrstat),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+AT,
+	},
+	[NFSPROC_CREATE] = {
+		.pc_func = (svc_procfunc) nfsd_proc_create,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_createargs),
+		.pc_ressize = sizeof(struct nfsd_diropres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+FH+AT,
+	},
+	[NFSPROC_REMOVE] = {
+		.pc_func = (svc_procfunc) nfsd_proc_remove,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_diropargs),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_REPLSTAT,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_RENAME] = {
+		.pc_func = (svc_procfunc) nfsd_proc_rename,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_renameargs),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_REPLSTAT,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_LINK] = {
+		.pc_func = (svc_procfunc) nfsd_proc_link,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_linkargs),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_REPLSTAT,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_SYMLINK] = {
+		.pc_func = (svc_procfunc) nfsd_proc_symlink,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_symlinkargs),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_REPLSTAT,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_MKDIR] = {
+		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
+		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_argsize = sizeof(struct nfsd_createargs),
+		.pc_ressize = sizeof(struct nfsd_diropres),
+		.pc_cachetype = RC_REPLBUFF,
+		.pc_xdrressize = ST+FH+AT,
+	},
+	[NFSPROC_RMDIR] = {
+		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_diropargs),
+		.pc_ressize = sizeof(struct nfsd_void),
+		.pc_cachetype = RC_REPLSTAT,
+		.pc_xdrressize = ST,
+	},
+	[NFSPROC_READDIR] = {
+		.pc_func = (svc_procfunc) nfsd_proc_readdir,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_argsize = sizeof(struct nfsd_readdirargs),
+		.pc_ressize = sizeof(struct nfsd_readdirres),
+		.pc_cachetype = RC_NOCACHE,
+	},
+	[NFSPROC_STATFS] = {
+		.pc_func = (svc_procfunc) nfsd_proc_statfs,
+		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
+		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_argsize = sizeof(struct nfsd_fhandle),
+		.pc_ressize = sizeof(struct nfsd_statfsres),
+		.pc_cachetype = RC_NOCACHE,
+		.pc_xdrressize = ST+5,
+	},
 };
 
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cbba4a935786..d4c9884cd54b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -390,12 +390,14 @@ nfsd_svc(unsigned short port, int nrservs)
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
-	error = -EINVAL;
 	if (nrservs <= 0)
 		nrservs = 0;
 	if (nrservs > NFSD_MAXSERVS)
 		nrservs = NFSD_MAXSERVS;
-	
+	error = 0;
+	if (nrservs == 0 && nfsd_serv == NULL)
+		goto out;
+
 	/* Readahead param cache - will no-op if it already exists */
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
@@ -413,6 +415,12 @@ nfsd_svc(unsigned short port, int nrservs)
 		goto failure;
 
 	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
+	if (error == 0)
+		/* We are holding a reference to nfsd_serv which
+		 * we don't want to count in the return value,
+		 * so subtract 1
+		 */
+		error = nfsd_serv->sv_nrthreads - 1;
  failure:
 	svc_destroy(nfsd_serv);		/* Release server */
  out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 99f835753596..23341c1063bc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -678,7 +678,6 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			int access, struct file **filp)
 {
-	const struct cred *cred = current_cred();
 	struct dentry	*dentry;
 	struct inode	*inode;
 	int		flags = O_RDONLY|O_LARGEFILE;
@@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 		vfs_dq_init(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-			    flags, cred);
+			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
 	else
@@ -966,6 +965,43 @@ static void kill_suid(struct dentry *dentry)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	static ino_t last_ino;
+	static dev_t last_dev;
+	int err = 0;
+
+	if (atomic_read(&inode->i_writecount) > 1
+	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+		msleep(10);
+		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+	}
+
+	if (inode->i_state & I_DIRTY) {
+		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+		err = nfsd_sync(file);
+	}
+	last_ino = inode->i_ino;
+	last_dev = inode->i_sb->s_dev;
+	return err;
+}
+
 static __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
@@ -978,6 +1014,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	__be32			err = 0;
 	int			host_err;
 	int			stable = *stablep;
+	int			use_wgather;
 
 #ifdef MSNFS
 	err = nfserr_perm;
@@ -996,9 +1033,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	 *  -	the sync export option has been set, or
 	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
 	 *  -   The file system doesn't support fsync().
-	 * When gathered writes have been configured for this volume,
+	 * When NFSv2 gathered writes have been configured for this volume,
 	 * flushing the data to disk is handled separately below.
 	 */
+	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
 	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
 	       stable = 2;
@@ -1007,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (!EX_ISSYNC(exp))
 		stable = 0;
-	if (stable && !EX_WGATHER(exp)) {
+	if (stable && !use_wgather) {
 		spin_lock(&file->f_lock);
 		file->f_flags |= O_SYNC;
 		spin_unlock(&file->f_lock);
@@ -1017,52 +1055,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
 	set_fs(oldfs);
-	if (host_err >= 0) {
-		*cnt = host_err;
-		nfsdstats.io_write += host_err;
-		fsnotify_modify(file->f_path.dentry);
-	}
+	if (host_err < 0)
+		goto out_nfserr;
+	*cnt = host_err;
+	nfsdstats.io_write += host_err;
+	fsnotify_modify(file->f_path.dentry);
 
 	/* clear setuid/setgid flag after write */
-	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+	if (inode->i_mode & (S_ISUID | S_ISGID))
 		kill_suid(dentry);
 
-	if (host_err >= 0 && stable) {
-		static ino_t	last_ino;
-		static dev_t	last_dev;
-
-		/*
-		 * Gathered writes: If another process is currently
-		 * writing to the file, there's a high chance
-		 * this is another nfsd (triggered by a bulk write
-		 * from a client's biod). Rather than syncing the
-		 * file with each write request, we sleep for 10 msec.
-		 *
-		 * I don't know if this roughly approximates
-		 * C. Juszak's idea of gathered writes, but it's a
-		 * nice and simple solution (IMHO), and it seems to
-		 * work:-)
-		 */
-		if (EX_WGATHER(exp)) {
-			if (atomic_read(&inode->i_writecount) > 1
-			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-				dprintk("nfsd: write defer %d\n", task_pid_nr(current));
-				msleep(10);
-				dprintk("nfsd: write resume %d\n", task_pid_nr(current));
-			}
-
-			if (inode->i_state & I_DIRTY) {
-				dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-				host_err=nfsd_sync(file);
-			}
-#if 0
-			wake_up(&inode->i_wait);
-#endif
-		}
-		last_ino = inode->i_ino;
-		last_dev = inode->i_sb->s_dev;
-	}
+	if (stable && use_wgather)
+		host_err = wait_for_concurrent_writes(file);
 
+out_nfserr:
 	dprintk("nfsd: write complete host_err=%d\n", host_err);
 	if (host_err >= 0)
 		err = 0;
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 36df60b6d8a4..99d58a028b94 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
 }
 
 static struct lock_class_key nilfs_bmap_dat_lock_key;
+static struct lock_class_key nilfs_bmap_mdt_lock_key;
 
 /**
  * nilfs_bmap_read - read a bmap from an inode
@@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
 		bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
 		bmap->b_last_allocated_key = 0;
 		bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
+		lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
 		break;
+	case NILFS_IFILE_INO:
+		lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
+		/* Fall through */
 	default:
 		bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
 		bmap->b_last_allocated_key = 0;
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 7d49813f66d6..aec942cf79e3 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
 		if (ret < 0) {
 			if (ret != -ENOENT)
-				goto out_header;
+				break;
 			/* skip hole */
 			ret = 0;
 			continue;
@@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 					continue;
 				printk(KERN_ERR "%s: cannot delete block\n",
 				       __func__);
-				goto out_header;
+				break;
 			}
 		}
 
@@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		kunmap_atomic(kaddr, KM_USER0);
 	}
 
- out_header:
 	brelse(header_bh);
 
  out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0b2710e2d565..8927ca27e6f7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
-	if (entry->de_blocknr != cpu_to_le64(0) ||
-	    entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) {
-		printk(KERN_CRIT
-		       "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n",
-		       __func__, (unsigned long long)req->pr_entry_nr,
-		       (unsigned long long)le64_to_cpu(entry->de_start),
-		       (unsigned long long)le64_to_cpu(entry->de_end),
-		       (unsigned long long)le64_to_cpu(entry->de_blocknr));
-	}
 	entry->de_blocknr = cpu_to_le64(blocknr);
 	kunmap_atomic(kaddr, KM_USER0);
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2696d6b513b7..fe9d8f2a13f8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
 	/* ii->i_file_acl = 0; */
 	/* ii->i_dir_acl = 0; */
 	ii->i_dir_start_lookup = 0;
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-	ii->i_acl = NULL;
-	ii->i_default_acl = NULL;
-#endif
 	ii->i_cno = 0;
 	nilfs_set_inode_flags(inode);
 	spin_lock(&sbi->s_next_gen_lock);
@@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
 
 	raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
 
-#ifdef CONFIG_NILFS_FS_POSIX_ACL
-	ii->i_acl = NILFS_ACL_NOT_CACHED;
-	ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-#endif
 	if (nilfs_read_inode_common(inode, raw_inode))
 		goto failed_unmap;
 
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index edf6a59d9f2a..724c63766e82 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -58,10 +58,6 @@ struct nilfs_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_NILFS_POSIX_ACL
-	struct posix_acl *i_acl;
-	struct posix_acl *i_default_acl;
-#endif
 	struct buffer_head *i_bh;	/* i_bh contains a new or dirty
 					   disk inode */
 	struct inode vfs_inode;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index aa977549919e..8b5e4778cf28 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1829,26 +1829,13 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
 		err = nilfs_segbuf_write(segbuf, &wi);
 
 		res = nilfs_segbuf_wait(segbuf, &wi);
-		err = unlikely(err) ? : res;
-		if (unlikely(err))
+		err = err ? : res;
+		if (err)
 			return err;
 	}
 	return 0;
 }
 
-static int nilfs_page_has_uncleared_buffer(struct page *page)
-{
-	struct buffer_head *head, *bh;
-
-	head = bh = page_buffers(page);
-	do {
-		if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
-			return 1;
-		bh = bh->b_this_page;
-	} while (bh != head);
-	return 0;
-}
-
 static void __nilfs_end_page_io(struct page *page, int err)
 {
 	if (!err) {
@@ -1872,12 +1859,11 @@ static void nilfs_end_page_io(struct page *page, int err)
 	if (!page)
 		return;
 
-	if (buffer_nilfs_node(page_buffers(page)) &&
-	    nilfs_page_has_uncleared_buffer(page))
-		/* For b-tree node pages, this function may be called twice
-		   or more because they might be split in a segment.
-		   This check assures that cleanup has been done for all
-		   buffers in a split btnode page. */
+	if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page))
+		/*
+		 * For b-tree node pages, this function may be called twice
+		 * or more because they might be split in a segment.
+		 */
 		return;
 
 	__nilfs_end_page_io(page, err);
@@ -1940,7 +1926,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
 			}
 			if (bh->b_page != fs_page) {
 				nilfs_end_page_io(fs_page, err);
-				if (unlikely(fs_page == failed_page))
+				if (fs_page && fs_page == failed_page)
 					goto done;
 				fs_page = bh->b_page;
 			}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ab785f85aa50..8e2ec43b18f4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
 {
 	struct nilfs_inode_info *ii = NILFS_I(inode);
 
-#ifdef CONFIG_NILFS_POSIX_ACL
-	if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
-		posix_acl_release(ii->i_acl);
-		ii->i_acl = NILFS_ACL_NOT_CACHED;
-	}
-	if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
-		posix_acl_release(ii->i_default_acl);
-		ii->i_default_acl = NILFS_ACL_NOT_CACHED;
-	}
-#endif
 	/*
 	 * Free resources allocated in nilfs_read_inode(), here.
 	 */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ff231ad23895..ff27a2965844 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
+	struct user_struct *user = group->inotify_data.user;
 
 	fsnotify_clear_marks_by_group(group);
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_put_group(group);
 
+	atomic_dec(&user->inotify_devs);
+
 	return 0;
 }
 
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cdeaa76f27f..110bb57c46ab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
 	enum ocfs2_unblock_action unblock_action;
 };
 
+/* Lockdep class keys */
+struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     u32 dlm_flags);
 static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
 						     int wanted);
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-				 struct ocfs2_lock_res *lockres,
-				 int level);
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+				   struct ocfs2_lock_res *lockres,
+				   int level, unsigned long caller_ip);
+static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
+					struct ocfs2_lock_res *lockres,
+					int level)
+{
+	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
+}
+
 static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
 static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
 
 	ocfs2_init_lock_stats(res);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (type != OCFS2_LOCK_TYPE_OPEN)
+		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
+				 &lockdep_keys[type], 0);
+	else
+		res->l_lockdep_map.key = NULL;
+#endif
 }
 
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
 static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
 					    struct ocfs2_super *osb)
 {
-	struct ocfs2_orphan_scan_lvb *lvb;
-
 	ocfs2_lock_res_init_once(res);
 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
 				   &ocfs2_orphan_scan_lops, osb);
-	lvb = ocfs2_dlm_lvb(&res->l_lksb);
-	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
 }
 
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
 	return ret;
 }
 
-static int ocfs2_cluster_lock(struct ocfs2_super *osb,
-			      struct ocfs2_lock_res *lockres,
-			      int level,
-			      u32 lkm_flags,
-			      int arg_flags)
+static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
+				struct ocfs2_lock_res *lockres,
+				int level,
+				u32 lkm_flags,
+				int arg_flags,
+				int l_subclass,
+				unsigned long caller_ip)
 {
 	struct ocfs2_mask_waiter mw;
 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1403,13 +1418,37 @@ out:
 	}
 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (!ret && lockres->l_lockdep_map.key != NULL) {
+		if (level == DLM_LOCK_PR)
+			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
+				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+				caller_ip);
+		else
+			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
+				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
+				caller_ip);
+	}
+#endif
 	mlog_exit(ret);
 	return ret;
 }
 
-static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
-				 struct ocfs2_lock_res *lockres,
-				 int level)
+static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
+				     struct ocfs2_lock_res *lockres,
+				     int level,
+				     u32 lkm_flags,
+				     int arg_flags)
+{
+	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
+				    0, _RET_IP_);
+}
+
+
+static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
+				   struct ocfs2_lock_res *lockres,
+				   int level,
+				   unsigned long caller_ip)
 {
 	unsigned long flags;
 
@@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 	ocfs2_dec_holders(lockres, level);
 	ocfs2_downconvert_on_unlock(osb, lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (lockres->l_lockdep_map.key != NULL)
+		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
+#endif
 	mlog_exit_void();
 }
 
@@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
 {
 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
-	if (lvb->lvb_version == OCFS2_LVB_VERSION
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
+	    && lvb->lvb_version == OCFS2_LVB_VERSION
 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
 		return 1;
 	return 0;
@@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
  * returns < 0 error if the callback will never be called, otherwise
  * the result of the lock will be communicated via the callback.
  */
-int ocfs2_inode_lock_full(struct inode *inode,
-			 struct buffer_head **ret_bh,
-			 int ex,
-			 int arg_flags)
+int ocfs2_inode_lock_full_nested(struct inode *inode,
+				 struct buffer_head **ret_bh,
+				 int ex,
+				 int arg_flags,
+				 int subclass)
 {
 	int status, level, acquired;
 	u32 dlm_flags;
@@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
 		dlm_flags |= DLM_LKF_NOQUEUE;
 
-	status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
+	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
+				      arg_flags, subclass, _RET_IP_);
 	if (status < 0) {
 		if (status != -EAGAIN && status != -EIOCBRETRY)
 			mlog_errno(status);
@@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode,
 	mlog_exit_void();
 }
 
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
-	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 	int status = 0;
 
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	if (ocfs2_mount_local(osb))
+		return 0;
+
 	lockres = &osb->osb_orphan_scan.os_lockres;
-	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
 	if (status < 0)
 		return status;
 
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-	if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
+	else
+		*seqno = osb->osb_orphan_scan.os_seqno + 1;
+
 	return status;
 }
 
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
 {
 	struct ocfs2_lock_res *lockres;
 	struct ocfs2_orphan_scan_lvb *lvb;
-	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
 
-	lockres = &osb->osb_orphan_scan.os_lockres;
-	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
-	lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
-	lvb->lvb_os_seqno = cpu_to_be32(seqno);
-	ocfs2_cluster_unlock(osb, lockres, level);
+	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
+		lockres = &osb->osb_orphan_scan.os_lockres;
+		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+		lvb->lvb_os_seqno = cpu_to_be32(seqno);
+		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
+	}
 }
 
 int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
 	struct ocfs2_global_disk_dqinfo *gdinfo;
 	int status = 0;
 
-	if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
+	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 31b90d7b8f51..7553836931de 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
 /* don't block waiting for the downconvert thread, instead return -EAGAIN */
 #define OCFS2_LOCK_NONBLOCK		(0x04)
 
+/* Locking subclasses of inode cluster lock */
+enum {
+	OI_LS_NORMAL = 0,
+	OI_LS_PARENT,
+	OI_LS_RENAME1,
+	OI_LS_RENAME2,
+};
+
 int ocfs2_dlm_init(struct ocfs2_super *osb);
 void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode);
 int ocfs2_inode_lock_atime(struct inode *inode,
 			  struct vfsmount *vfsmnt,
 			  int *level);
-int ocfs2_inode_lock_full(struct inode *inode,
+int ocfs2_inode_lock_full_nested(struct inode *inode,
 			 struct buffer_head **ret_bh,
 			 int ex,
-			 int arg_flags);
+			 int arg_flags,
+			 int subclass);
 int ocfs2_inode_lock_with_page(struct inode *inode,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page);
+/* Variants without special locking class or flags */
+#define ocfs2_inode_lock_full(i, r, e, f)\
+		ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
+#define ocfs2_inode_lock_nested(i, b, e, s)\
+		ocfs2_inode_lock_full_nested(i, b, e, 0, s)
 /* 99% of the time we don't want to supply any additional flags --
  * those are for very specific cases only. */
-#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
 void ocfs2_inode_unlock(struct inode *inode,
 		       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
 		     int ex);
 void ocfs2_super_unlock(struct ocfs2_super *osb,
 			int ex);
-int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex);
-void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex);
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
 
 int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 07267e0da909..62442e413a00 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 				      size_t len,
 				      unsigned int flags)
 {
-	int ret = 0;
+	int ret = 0, lock_level = 0;
 	struct inode *inode = in->f_path.dentry->d_inode;
 
 	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 	/*
 	 * See the comment in ocfs2_file_aio_read()
 	 */
-	ret = ocfs2_inode_lock(inode, NULL, 0);
+	ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
 	}
-	ocfs2_inode_unlock(inode, 0);
+	ocfs2_inode_unlock(inode, lock_level);
 
 	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 10e1fa87396a..4dc8890ba316 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -215,6 +215,8 @@ bail:
 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 {
 	struct ocfs2_find_inode_args *args = opaque;
+	static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
+				     ocfs2_file_ip_alloc_sem_key;
 
 	mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
 
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
 	if (args->fi_sysfile_type != 0)
 		lockdep_set_class(&inode->i_mutex,
 			&ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+	if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+	    args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
+		lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+				  &ocfs2_quota_ip_alloc_sem_key);
+	else
+		lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+				  &ocfs2_file_ip_alloc_sem_key);
 
 	mlog_exit(0);
 	return 0;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4a3b9e6b31ad..f033760ecbea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 
 	os = &osb->osb_orphan_scan;
 
-	status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		goto out;
+
+	status = ocfs2_orphan_scan_lock(osb, &seqno);
 	if (status < 0) {
 		if (status != -EAGAIN)
 			mlog_errno(status);
 		goto out;
 	}
 
+	/* Do no queue the tasks if the volume is being umounted */
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		goto unlock;
+
 	if (os->os_seqno != seqno) {
 		os->os_seqno = seqno;
 		goto unlock;
@@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
 	os->os_count++;
 	os->os_scantime = CURRENT_TIME;
 unlock:
-	ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
+	ocfs2_orphan_scan_unlock(osb, seqno);
 out:
 	return;
 }
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
 
 	mutex_lock(&os->os_lock);
 	ocfs2_queue_orphan_scan(osb);
-	schedule_delayed_work(&os->os_orphan_scan_work,
-			      ocfs2_orphan_scan_timeout());
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
+		schedule_delayed_work(&os->os_orphan_scan_work,
+				      ocfs2_orphan_scan_timeout());
 	mutex_unlock(&os->os_lock);
 }
 
@@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
-	mutex_lock(&os->os_lock);
-	cancel_delayed_work(&os->os_orphan_scan_work);
-	mutex_unlock(&os->os_lock);
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
+		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+		mutex_lock(&os->os_lock);
+		cancel_delayed_work(&os->os_orphan_scan_work);
+		mutex_unlock(&os->os_lock);
+	}
 }
 
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
 {
 	struct ocfs2_orphan_scan *os;
 
 	os = &osb->osb_orphan_scan;
 	os->os_osb = osb;
 	os->os_count = 0;
+	os->os_seqno = 0;
 	os->os_scantime = CURRENT_TIME;
 	mutex_init(&os->os_lock);
-
-	INIT_DELAYED_WORK(&os->os_orphan_scan_work,
-			  ocfs2_orphan_scan_work);
-	schedule_delayed_work(&os->os_orphan_scan_work,
-			      ocfs2_orphan_scan_timeout());
-	return 0;
+	INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
+
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
+		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
+	else {
+		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
+		schedule_delayed_work(&os->os_orphan_scan_work,
+				      ocfs2_orphan_scan_timeout());
+	}
 }
 
 struct ocfs2_orphan_filldir_priv {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 61045eeb3f6e..5432c7f79cc6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
 }
 
 /* Exported only for the journal struct init code in super.c. Do not call. */
-int ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
 void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 33464c6b60a2..8601f934010b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
 
-	status = ocfs2_inode_lock(dir, NULL, 0);
+	status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
+	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
 		return -EPERM;
 	}
 
-	status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
+	status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
+					 OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			inode1 = tmpinode;
 		}
 		/* lock id2 */
-		status = ocfs2_inode_lock(inode2, bh2, 1);
+		status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+						 OI_LS_RENAME1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	}
 
 	/* lock id1 */
-	status = ocfs2_inode_lock(inode1, bh1, 1);
+	status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
 	if (status < 0) {
 		/*
 		 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
 	 * won't have to concurrently downconvert the inode and the
 	 * dentry locks.
 	 */
-	status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
+	status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
+					 OI_LS_PARENT);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18c1d9ec1c93..c9345ebb8493 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/lockdep.h>
 #ifndef CONFIG_OCFS2_COMPAT_JBD
 # include <linux/jbd2.h>
 #else
@@ -152,6 +153,14 @@ struct ocfs2_lock_res {
 	unsigned int		 l_lock_max_exmode; 	   /* Max wait for EX */
 	unsigned int		 l_lock_refresh;	   /* Disk refreshes */
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	 l_lockdep_map;
+#endif
+};
+
+enum ocfs2_orphan_scan_state {
+	ORPHAN_SCAN_ACTIVE,
+	ORPHAN_SCAN_INACTIVE
 };
 
 struct ocfs2_orphan_scan {
@@ -162,6 +171,7 @@ struct ocfs2_orphan_scan {
 	struct timespec		os_scantime;  /* time this node ran the scan */
 	u32			os_count;      /* tracks node specific scans */
 	u32  			os_seqno;       /* tracks cluster wide scans */
+	atomic_t		os_state;              /* ACTIVE or INACTIVE */
 };
 
 struct ocfs2_dlm_debug {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index fcd120f1493a..3f661376a2de 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 	return dlm_status_to_errno(lksb->lksb_o2dlm.status);
 }
 
+/*
+ * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * contents, it will zero out the LVB.  Thus the caller can always trust
+ * the contents.
+ */
+static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	return 1;
+}
+
 static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
 	.dlm_lock	= o2cb_dlm_lock,
 	.dlm_unlock	= o2cb_dlm_unlock,
 	.lock_status	= o2cb_dlm_lock_status,
+	.lvb_valid	= o2cb_dlm_lvb_valid,
 	.lock_lvb	= o2cb_dlm_lvb,
 	.dump_lksb	= o2cb_dump_lksb,
 };
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76d41a8ac6..ff4c798a5635 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 	return lksb->lksb_fsdlm.sb_status;
 }
 
+static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
+
+	return !invalid;
+}
+
 static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
 	.dlm_lock	= user_dlm_lock,
 	.dlm_unlock	= user_dlm_unlock,
 	.lock_status	= user_dlm_lock_status,
+	.lvb_valid	= user_dlm_lvb_valid,
 	.lock_lvb	= user_dlm_lvb,
 	.plock		= user_plock,
 	.dump_lksb	= user_dlm_dump_lksb,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 68b668b0e60a..3f2f1c45b7b6 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -6,7 +6,7 @@
  * Code which implements an OCFS2 specific interface to underlying
  * cluster stacks.
  *
- * Copyright (C) 2007 Oracle.  All rights reserved.
+ * Copyright (C) 2007, 2009 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
 }
 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
 
-/*
- * Why don't we cast to ocfs2_meta_lvb?  The "clean" answer is that we
- * don't cast at the glue level.  The real answer is that the header
- * ordering is nigh impossible.
- */
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
+{
+	return active_stack->sp_ops->lvb_valid(lksb);
+}
+EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
+
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
 {
 	return active_stack->sp_ops->lock_lvb(lksb);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index c571af375ef8..03a44d60eac9 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -186,6 +186,11 @@ struct ocfs2_stack_operations {
 	int (*lock_status)(union ocfs2_dlm_lksb *lksb);
 
 	/*
+	 * Return non-zero if the LVB is valid.
+	 */
+	int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
+
+	/*
 	 * Pull the lvb pointer off of the stack-specific lksb.
 	 */
 	void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
 		     struct ocfs2_lock_res *astarg);
 
 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
+int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
 
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8439f6b324b9..73a16d4666dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 					 int nr)
 {
 	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+	int ret;
 
 	if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
 		return 0;
-	if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
+
+	if (!buffer_jbd(bg_bh))
 		return 1;
 
+	jbd_lock_bh_state(bg_bh);
 	bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
-	return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+	if (bg)
+		ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+	else
+		ret = 1;
+	jbd_unlock_bh_state(bg_bh);
+
+	return ret;
 }
 
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	unsigned int tmp;
 	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	struct ocfs2_group_desc *undo_bg = NULL;
+	int cluster_bitmap = 0;
 
 	mlog_entry_void();
 
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	}
 
 	if (ocfs2_is_cluster_bitmap(alloc_inode))
-		undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
+		cluster_bitmap = 1;
+
+	if (cluster_bitmap) {
+		jbd_lock_bh_state(group_bh);
+		undo_bg = (struct ocfs2_group_desc *)
+					bh2jh(group_bh)->b_committed_data;
+		BUG_ON(!undo_bg);
+	}
 
 	tmp = num_bits;
 	while(tmp--) {
 		ocfs2_clear_bit((bit_off + tmp),
 				(unsigned long *) bg->bg_bitmap);
-		if (ocfs2_is_cluster_bitmap(alloc_inode))
+		if (cluster_bitmap)
 			ocfs2_set_bit(bit_off + tmp,
 				      (unsigned long *) undo_bg->bg_bitmap);
 	}
 	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
 
+	if (cluster_bitmap)
+		jbd_unlock_bh_state(group_bh);
+
 	status = ocfs2_journal_dirty(handle, group_bh);
 	if (status < 0)
 		mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0d3ed7407a04..7efb349fb9bd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEBUG_FS
 static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 {
-	int out = 0;
-	int i;
 	struct ocfs2_cluster_connection *cconn = osb->cconn;
 	struct ocfs2_recovery_map *rm = osb->recovery_map;
-	struct ocfs2_orphan_scan *os;
+	struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
+	int i, out = 0;
 
 	out += snprintf(buf + out, len - out,
 			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
@@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 			"%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
 			osb->s_mount_opt, osb->s_atime_quantum);
 
-	out += snprintf(buf + out, len - out,
-			"%10s => Stack: %s  Name: %*s  Version: %d.%d\n",
-			"Cluster",
-			(*osb->osb_cluster_stack == '\0' ?
-			 "o2cb" : osb->osb_cluster_stack),
-			cconn->cc_namelen, cconn->cc_name,
-			cconn->cc_version.pv_major, cconn->cc_version.pv_minor);
+	if (cconn) {
+		out += snprintf(buf + out, len - out,
+				"%10s => Stack: %s  Name: %*s  "
+				"Version: %d.%d\n", "Cluster",
+				(*osb->osb_cluster_stack == '\0' ?
+				 "o2cb" : osb->osb_cluster_stack),
+				cconn->cc_namelen, cconn->cc_name,
+				cconn->cc_version.pv_major,
+				cconn->cc_version.pv_minor);
+	}
 
 	spin_lock(&osb->dc_task_lock);
 	out += snprintf(buf + out, len - out,
 			"%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
 			"WorkSeq: %lu\n", "DownCnvt",
-			task_pid_nr(osb->dc_task), osb->blocked_lock_count,
-			osb->dc_wake_sequence, osb->dc_work_sequence);
+			(osb->dc_task ?  task_pid_nr(osb->dc_task) : -1),
+			osb->blocked_lock_count, osb->dc_wake_sequence,
+			osb->dc_work_sequence);
 	spin_unlock(&osb->dc_task_lock);
 
 	spin_lock(&osb->osb_lock);
@@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 
 	out += snprintf(buf + out, len - out,
 			"%10s => Pid: %d  Interval: %lu  Needs: %d\n", "Commit",
-			task_pid_nr(osb->commit_task), osb->osb_commit_interval,
+			(osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
+			osb->osb_commit_interval,
 			atomic_read(&osb->needs_checkpoint));
 
 	out += snprintf(buf + out, len - out,
-			"%10s => State: %d  NumTxns: %d  TxnId: %lu\n",
+			"%10s => State: %d  TxnId: %lu  NumTxns: %d\n",
 			"Journal", osb->journal->j_state,
-			atomic_read(&osb->journal->j_num_trans),
-			osb->journal->j_trans_id);
+			osb->journal->j_trans_id,
+			atomic_read(&osb->journal->j_num_trans));
 
 	out += snprintf(buf + out, len - out,
 			"%10s => GlobalAllocs: %d  LocalAllocs: %d  "
@@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 			atomic_read(&osb->s_num_inodes_stolen));
 	spin_unlock(&osb->osb_lock);
 
+	out += snprintf(buf + out, len - out, "OrphanScan => ");
+	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
+			os->os_count, os->os_seqno);
+	out += snprintf(buf + out, len - out, " Last Scan: ");
+	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
+		out += snprintf(buf + out, len - out, "Disabled\n");
+	else
+		out += snprintf(buf + out, len - out, "%lu seconds ago\n",
+				(get_seconds() - os->os_scantime.tv_sec));
+
 	out += snprintf(buf + out, len - out, "%10s => %3s  %10s\n",
 			"Slots", "Num", "RecoGen");
-
 	for (i = 0; i < osb->max_slots; ++i) {
 		out += snprintf(buf + out, len - out,
 				"%10s  %c %3d  %10d\n",
@@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
 				i, osb->slot_recovery_generations[i]);
 	}
 
-	os = &osb->osb_orphan_scan;
-	out += snprintf(buf + out, len - out, "Orphan Scan=> ");
-	out += snprintf(buf + out, len - out, "Local: %u  Global: %u ",
-			os->os_count, os->os_seqno);
-	out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
-			(get_seconds() - os->os_scantime.tv_sec));
-
 	return out;
 }
 
@@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
 	wake_up(&osb->osb_mount_event);
 
+	/* Start this when the mount is almost sure of being successful */
+	ocfs2_orphan_scan_init(osb);
+
 	mlog_exit(status);
 	return status;
 
@@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
 	debugfs_remove(osb->osb_ctxt);
 
+	/* Orphan scan should be stopped as early as possible */
+	ocfs2_orphan_scan_stop(osb);
+
 	ocfs2_disable_quotas(osb);
 
 	ocfs2_shutdown_local_alloc(osb);
 
 	ocfs2_truncate_log_shutdown(osb);
 
-	ocfs2_orphan_scan_stop(osb);
-
 	/* This will disable recovery and flush any recovery work. */
 	ocfs2_recovery_exit(osb);
 
@@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
-	status = ocfs2_orphan_scan_init(osb);
-	if (status) {
-		mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
-		mlog_errno(status);
-		goto bail;
-	}
-
 	init_waitqueue_head(&osb->checkpoint_event);
 	atomic_set(&osb->needs_checkpoint, 0);
 
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index ab713ebdd546..40e53702948c 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
 					   int type,
 					   u32 slot);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
+#endif
+
 static inline int is_global_system_inode(int type)
 {
 	return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 		inode = NULL;
 		goto bail;
 	}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+	    type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
+	    type == JOURNAL_SYSTEM_INODE) {
+		/* Ignore inode lock on these inodes as the lock does not
+		 * really belong to any process and lockdep cannot handle
+		 * that */
+		OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
+	} else {
+		lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
+								l_lockdep_map,
+				 ocfs2_system_inodes[type].si_name,
+				 &ocfs2_sysfile_cluster_lock_key[type], 0);
+	}
+#endif
 bail:
 
 	return inode;
diff --git a/fs/open.c b/fs/open.c
index 7200e23d9258..dd98e8076024 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 #endif
 #endif /* BITS_PER_LONG == 32 */
 
-SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+
+int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-	struct file *file;
-	struct inode *inode;
-	long ret = -EINVAL;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	long ret;
 
 	if (offset < 0 || len <= 0)
-		goto out;
+		return -EINVAL;
 
 	/* Return error if mode is not supported */
-	ret = -EOPNOTSUPP;
 	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
-		goto out;
+		return -EOPNOTSUPP;
 
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
 	if (!(file->f_mode & FMODE_WRITE))
-		goto out_fput;
+		return -EBADF;
 	/*
 	 * Revalidate the write permissions, in case security policy has
 	 * changed since the files were opened.
 	 */
 	ret = security_file_permission(file, MAY_WRITE);
 	if (ret)
-		goto out_fput;
+		return ret;
 
-	inode = file->f_path.dentry->d_inode;
-
-	ret = -ESPIPE;
 	if (S_ISFIFO(inode->i_mode))
-		goto out_fput;
+		return -ESPIPE;
 
-	ret = -ENODEV;
 	/*
 	 * Let individual file system decide if it supports preallocation
 	 * for directories or not.
 	 */
 	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-		goto out_fput;
+		return -ENODEV;
 
-	ret = -EFBIG;
 	/* Check for wrap through zero too */
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
-		goto out_fput;
+		return -EFBIG;
 
-	if (inode->i_op->fallocate)
-		ret = inode->i_op->fallocate(inode, mode, offset, len);
-	else
-		ret = -EOPNOTSUPP;
+	if (!inode->i_op->fallocate)
+		return -EOPNOTSUPP;
 
-out_fput:
-	fput(file);
-out:
-	return ret;
+	return inode->i_op->fallocate(inode, mode, offset, len);
 }
+
+SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+{
+	struct file *file;
+	int error = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		error = do_fallocate(file, mode, offset, len);
+		fput(file);
+	}
+
+	return error;
+}
+
 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
 {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 607c579e5eca..70f36c043d62 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2042,8 +2042,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		 * changes */
 		invalidate_bdev(sb->s_bdev);
 	}
-	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
@@ -2094,7 +2094,6 @@ out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
 out_lock:
-	mutex_unlock(&dqopt->dqonoff_mutex);
 	if (oldflags != -1) {
 		down_write(&dqopt->dqptr_sem);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -2104,6 +2103,7 @@ out_lock:
 		up_write(&dqopt->dqptr_sem);
 	}
 	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
 	put_quota_format(fmt);
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45db..a14d6cd9eeda 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	/* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 77f5bb746bf0..90622200b39c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		congestion_wait(WRITE, HZ / 10);
+		congestion_wait(BLK_RW_ASYNC, HZ / 10);
 	return 0;
 }
 
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 238e9d9b31e0..18b315d3d104 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 		if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
 			printk
 			    ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
-			unlock_super(s);
 			return -ENOMEM;
 		}
 		/* the new journal bitmaps are zero filled, now we copy in the bitmap
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc22..7adea74d6a8a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -24,7 +24,6 @@
 #include <linux/exportfs.h>
 #include <linux/quotaops.h>
 #include <linux/vfs.h>
-#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
@@ -529,10 +528,6 @@ static void init_once(void *foo)
 
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	inode_init_once(&ei->vfs_inode);
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	ei->i_acl_access = NULL;
-	ei->i_acl_default = NULL;
-#endif
 }
 
 static int init_inodecache(void)
@@ -580,25 +575,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
 	reiserfs_write_unlock(inode->i_sb);
 }
 
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-static void reiserfs_clear_inode(struct inode *inode)
-{
-	struct posix_acl *acl;
-
-	acl = REISERFS_I(inode)->i_acl_access;
-	if (acl && !IS_ERR(acl))
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_access = NULL;
-
-	acl = REISERFS_I(inode)->i_acl_default;
-	if (acl && !IS_ERR(acl))
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_default = NULL;
-}
-#else
-#define reiserfs_clear_inode NULL
-#endif
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -612,7 +588,6 @@ static const struct super_operations reiserfs_sops = {
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
 	.delete_inode = reiserfs_delete_inode,
-	.clear_inode = reiserfs_clear_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
 	.sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c303c426fe2b..35d6e672a279 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
-			    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
-static inline struct posix_acl *iget_acl(struct inode *inode,
-					 struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ERR_PTR(-ENODATA);
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
-	struct posix_acl *acl, **p_acl;
+	struct posix_acl *acl;
 	int size;
 	int retval;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	acl = iget_acl(inode, p_acl);
-	if (acl && !IS_ERR(acl))
-		return acl;
-	else if (PTR_ERR(acl) == -ENODATA)
-		return NULL;
-
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
 	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
-			*p_acl = ERR_PTR(-ENODATA);
+			set_cached_acl(inode, type, NULL);
 			return NULL;
 		}
 		return ERR_PTR(size);
@@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		/* This shouldn't actually happen as it should have
 		   been caught above.. but just in case */
 		acl = NULL;
-		*p_acl = ERR_PTR(-ENODATA);
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
 	} else {
 		acl = posix_acl_from_disk(value, retval);
-		if (!IS_ERR(acl))
-			iset_acl(inode, p_acl, acl);
 	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
 	kfree(value);
 	return acl;
@@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 {
 	char *name;
 	void *value = NULL;
-	struct posix_acl **p_acl;
 	size_t size = 0;
 	int error;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
@@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
 		if (acl) {
 			mode_t mode = inode->i_mode;
 			error = posix_acl_equiv_mode(acl, &mode);
@@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		break;
@@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	kfree(value);
 
 	if (!error)
-		iset_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return error;
 }
@@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 	}
 
 	acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
-	if (IS_ERR(acl)) {
-		if (PTR_ERR(acl) == -ENODATA)
-			goto apply_umask;
+	if (IS_ERR(acl))
 		return PTR_ERR(acl);
-	}
 
 	if (acl) {
 		struct posix_acl *acl_copy;
diff --git a/fs/super.c b/fs/super.c
index d40d53a22fb5..2761d3e22ed9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -608,6 +608,7 @@ void emergency_remount(void)
 
 static DEFINE_IDA(unnamed_dev_ida);
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
+static int unnamed_dev_start = 0; /* don't bother trying below it */
 
 int set_anon_super(struct super_block *s, void *data)
 {
@@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data)
 	if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
 		return -ENOMEM;
 	spin_lock(&unnamed_dev_lock);
-	error = ida_get_new(&unnamed_dev_ida, &dev);
+	error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
+	if (!error)
+		unnamed_dev_start = dev + 1;
 	spin_unlock(&unnamed_dev_lock);
 	if (error == -EAGAIN)
 		/* We raced and lost with another CPU. */
@@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
 	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		ida_remove(&unnamed_dev_ida, dev);
+		if (unnamed_dev_start > dev)
+			unnamed_dev_start = dev;
 		spin_unlock(&unnamed_dev_lock);
 		return -EMFILE;
 	}
@@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb)
 	generic_shutdown_super(sb);
 	spin_lock(&unnamed_dev_lock);
 	ida_remove(&unnamed_dev_ida, slot);
+	if (slot < unnamed_dev_start)
+		unnamed_dev_start = slot;
 	spin_unlock(&unnamed_dev_lock);
 }
 
diff --git a/fs/sync.c b/fs/sync.c
index dd200025af85..3422ba61d86d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -112,8 +112,13 @@ restart:
 	mutex_unlock(&mutex);
 }
 
+/*
+ * sync everything.  Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
 SYSCALL_DEFINE0(sync)
 {
+	wakeup_pdflush(0);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 9345806c8853..2524714bece1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -171,6 +171,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 	if (count > 0)
 		*off = offs + count;
 
+	kfree(temp);
 	return count;
 }
 
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c89..adafcf556531 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
  * ACL support is not implemented.
  */
 
+#include "ubifs.h"
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
 
 /*
  * Limit the number of extended attributes per inode so that the total size
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index e48e9a3af763..1e068535b58b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	part_len = sbi->s_partmaps[partition].s_partition_len;
-	if (first_block < 0 || first_block >= part_len)
+	if (first_block >= part_len)
 		goto out;
 
 	if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
 	mutex_lock(&sbi->s_alloc_mutex);
 
 repeat:
-	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+	if (goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 	int8_t etype = -1;
 	struct udf_inode_info *iinfo;
 
-	if (first_block < 0 ||
-		first_block >= sbi->s_partmaps[partition].s_partition_len)
+	if (first_block >= sbi->s_partmaps[partition].s_partition_len)
 		return 0;
 
 	iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
 		return newblock;
 
 	mutex_lock(&sbi->s_alloc_mutex);
-	if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len)
+	if (goal >= sbi->s_partmaps[partition].s_partition_len)
 		goal = 0;
 
 	/* We search for the closest matching block to goal. If we find
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 703843f30ffd..1b88fd5df05d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
 	struct block_device *bdev = sb->s_bdev;
 	unsigned long lblock = 0;
 
-	if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock))
+	/*
+	 * ioctl failed or returned obviously bogus value?
+	 * Try using the device size...
+	 */
+	if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
+	    lblock == 0)
 		lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 
 	if (lblock)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 1cd3b55ee3d2..2d3f90afe5f1 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 1e9d1246eebc..b23a54506446 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -25,14 +25,10 @@
 #include <linux/posix_acl_xattr.h>
 
 
-#define XFS_ACL_NOT_CACHED	((void *)-1)
-
 /*
  * Locking scheme:
  *  - all ACL updates are protected by inode->i_mutex, which is taken before
  *    calling into this file.
- *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
- *    protected by inode->i_lock.
  */
 
 STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
 	}
 }
 
-/*
- * Update the cached ACL pointer in the inode.
- *
- * Because we don't hold any locks while reading/writing the attribute
- * from/to disk another thread could have raced and updated the cached
- * ACL value before us. In that case we release the previous cached value
- * and update it with our new value.
- */
-STATIC void
-xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 struct posix_acl *
 xfs_get_acl(struct inode *inode, int type)
 {
 	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl *acl = NULL, **p_acl;
+	struct posix_acl *acl;
 	struct xfs_acl *xfs_acl;
 	int len = sizeof(struct xfs_acl);
 	char *ea_name;
 	int error;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	spin_lock(&inode->i_lock);
-	if (*p_acl != XFS_ACL_NOT_CACHED)
-		acl = posix_acl_dup(*p_acl);
-	spin_unlock(&inode->i_lock);
-
 	/*
 	 * If we have a cached ACLs value just return it, not need to
 	 * go out to the disk.
 	 */
-	if (acl)
-		return acl;
 
 	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
 	if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
 		/*
 		 * If the attribute doesn't exist make sure we have a negative
 		 * cache entry, for any other error assume it is transient and
-		 * leave the cache entry as XFS_ACL_NOT_CACHED.
+		 * leave the cache entry as ACL_NOT_CACHED.
 		 */
 		if (error == -ENOATTR) {
 			acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
 		goto out;
 
  out_update_cache:
-	xfs_update_cached_acl(inode, p_acl, acl);
+	set_cached_acl(inode, type, acl);
  out:
 	kfree(xfs_acl);
 	return acl;
@@ -189,7 +161,6 @@ STATIC int
 xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
 	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl **p_acl;
 	char *ea_name;
 	int error;
 
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		ea_name = SGI_ACL_FILE;
-		p_acl = &ip->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		ea_name = SGI_ACL_DEFAULT;
-		p_acl = &ip->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	}
 
 	if (!error)
-		xfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 	return error;
 }
 
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-void
-xfs_inode_init_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking, inode is not live yet.
-	 */
-	ip->i_acl = XFS_ACL_NOT_CACHED;
-	ip->i_default_acl = XFS_ACL_NOT_CACHED;
-}
-
-void
-xfs_inode_clear_acls(struct xfs_inode *ip)
-{
-	/*
-	 * No need for locking here, the inode is not live anymore
-	 * and just about to be freed.
-	 */
-	if (ip->i_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_acl);
-	if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
-		posix_acl_release(ip->i_default_acl);
-}
-
-
 /*
  * System xattr handlers.
  *
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 1418b916fc27..0c93c7ef3d18 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
 
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
 
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 63dc1f2efad5..947b150df8ed 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
 extern int xfs_acl_chmod(struct inode *inode);
-extern void xfs_inode_init_acls(struct xfs_inode *ip);
-extern void xfs_inode_clear_acls(struct xfs_inode *ip);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
 # define xfs_get_acl(inode, type)			NULL
 # define xfs_inherit_acl(inode, default_acl)		0
 # define xfs_acl_chmod(inode)				0
-# define xfs_inode_init_acls(ip)
-# define xfs_inode_clear_acls(ip)
 # define posix_acl_access_exists(inode)			0
 # define posix_acl_default_exists(inode)		0
 #endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..5fcec6f020a7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -83,7 +83,6 @@ xfs_inode_alloc(
 	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 	ip->i_size = 0;
 	ip->i_new_size = 0;
-	xfs_inode_init_acls(ip);
 
 	/*
 	 * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
-	xfs_inode_clear_acls(ip);
 	kmem_zone_free(xfs_inode_zone, ip);
 }
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 77016702938b..1804f866a71d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
 	/* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */