7 files changed, 69 insertions, 102 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 0a926e2ba3ab..6a34a0f4d37c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,8 @@ prototypes:
 	int (*rename2) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
-	void * (*follow_link) (struct dentry *, struct nameidata *);
-	void (*put_link) (struct dentry *, struct nameidata *, void *);
+	const char *(*follow_link) (struct dentry *, void **);
+	void (*put_link) (struct inode *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, unsigned int);
 	int (*get_acl)(struct inode *, int);
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7cac200e2a85..7eb762eb3136 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -1,41 +1,15 @@
-Support is available for filesystems that wish to do automounting support (such
-as kAFS which can be found in fs/afs/). This facility includes allowing
-in-kernel mounts to be performed and mountpoint degradation to be
-requested. The latter can also be requested by userspace.
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
 
 
 ======================
 IN-KERNEL AUTOMOUNTING
 ======================
 
-A filesystem can now mount another filesystem on one of its directories by the
-following procedure:
-
- (1) Give the directory a follow_link() operation.
-
-     When the directory is accessed, the follow_link op will be called, and
-     it will be provided with the location of the mountpoint in the nameidata
-     structure (vfsmount and dentry).
-
- (2) Have the follow_link() op do the following steps:
-
-     (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
-         superblock and gain a vfsmount structure representing it.
-
-     (b) Copy the nameidata provided as an argument and substitute the dentry
-	 argument into it the copy.
-
-     (c) Call do_add_mount() to install the new vfsmount into the namespace's
-	 mountpoint tree, thus making it accessible to userspace. Use the
-	 nameidata set up in (b) as the destination.
-
-	 If the mountpoint will be automatically expired, then do_add_mount()
-	 should also be given the location of an expiration list (see further
-	 down).
-
-     (d) Release the path in the nameidata argument and substitute in the new
-	 vfsmount and its root dentry. The ref counts on these will need
-	 incrementing.
+See section "Mount Traps" of  Documentation/filesystems/autofs4.txt
 
 Then from userspace, you can just do something like:
 
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
 ===========================
 
 Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined above.
+mountpoint to be expired in the automounting procedure outlined separately.
 
 To do expiration, you need to follow these steps:
 
- (3) Create at least one list off which the vfsmounts to be expired can be
-     hung. Access to this list will be governed by the vfsmount_lock.
+ (1) Create at least one list off which the vfsmounts to be expired can be
+     hung.
 
- (4) In step (2c) above, the call to do_add_mount() should be provided with a
-     pointer to this list. It will hang the vfsmount off of it if it succeeds.
+ (2) When a new mountpoint is created in the ->d_automount method, add
+     the mnt to the list using mnt_set_expiry()
+             mnt_set_expiry(newmnt, &afs_vfsmounts);
 
- (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
      with a pointer to this list. This will process the list, marking every
      vfsmount thereon for potential expiry on the next call.
 
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt
index 64ced5149d37..1a5d82180b84 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.txt
@@ -68,16 +68,10 @@ sockets-enqueued
 	rate of change for this counter is zero; significantly non-zero
 	values may indicate a performance limitation.
 
-	This can happen either because there are too few nfsd threads in the
-	thread pool for the NFS workload (the workload is thread-limited),
-	or because the NFS workload needs more CPU time than is available in
-	the thread pool (the workload is CPU-limited).  In the former case,
-	configuring more nfsd threads will probably improve the performance
-	of the NFS workload.  In the latter case, the sunrpc server layer is
-	already choosing not to wake idle nfsd threads because there are too
-	many nfsd threads which want to run but cannot, so configuring more
-	nfsd threads will make no difference whatsoever.  The overloads-avoided
-	statistic (see below) can be used to distinguish these cases.
+	This can happen because there are too few nfsd threads in the thread
+	pool for the NFS workload (the workload is thread-limited), in which
+	case configuring more nfsd threads will probably improve the
+	performance of the NFS workload.
 
 threads-woken
 	Counts how many times an idle nfsd thread is woken to try to
@@ -88,36 +82,6 @@ threads-woken
 	thing.  The ideal rate of change for this counter will be close
 	to but less than the rate of change of the packets-arrived counter.
 
-overloads-avoided
-	Counts how many times the sunrpc server layer chose not to wake an
-	nfsd thread, despite the presence of idle nfsd threads, because
-	too many nfsd threads had been recently woken but could not get
-	enough CPU time to actually run.
-
-	This statistic counts a circumstance where the sunrpc layer
-	heuristically avoids overloading the CPU scheduler with too many
-	runnable nfsd threads.  The ideal rate of change for this counter
-	is zero.  Significant non-zero values indicate that the workload
-	is CPU limited.  Usually this is associated with heavy CPU usage
-	on all the CPUs in the nfsd thread pool.
-
-	If a sustained large overloads-avoided rate is detected on a pool,
-	the top(1) utility should be used to check for the following
-	pattern of CPU usage on all the CPUs associated with the given
-	nfsd thread pool.
-
-	 - %us ~= 0 (as you're *NOT* running applications on your NFS server)
-
-	 - %wa ~= 0
-
-	 - %id ~= 0
-
-	 - %sy + %hi + %si ~= 100
-
-	If this pattern is seen, configuring more nfsd threads will *not*
-	improve the performance of the workload.  If this patten is not
-	seen, then something more subtle is wrong.
-
 threads-timedout
 	Counts how many times an nfsd thread triggered an idle timeout,
 	i.e. was not woken to handle any incoming network packets for
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index e69274de8d0c..68f1c9106573 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -379,10 +379,10 @@ may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
 returned if the filesystem cannot handle rcu-walk. See
 Documentation/filesystems/vfs.txt for more details.
 
-	permission and check_acl are inode permission checks that are called
-on many or all directory inodes on the way down a path walk (to check for
-exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
-See Documentation/filesystems/vfs.txt for more details.
+	permission is an inode permission check that is called on many or all
+directory inodes on the way down a path walk (to check for exec permission). It
+must now be rcu-walk aware (mask & MAY_NOT_BLOCK).  See
+Documentation/filesystems/vfs.txt for more details.
  
 --
 [mandatory]
@@ -483,3 +483,20 @@ in your dentry operations instead.
 --
 [mandatory]
 	->aio_read/->aio_write are gone.  Use ->read_iter/->write_iter.
+---
+[recommended]
+	for embedded ("fast") symlinks just set inode->i_link to wherever the
+	symlink body is and use simple_follow_link() as ->follow_link().
+--
+[mandatory]
+	calling conventions for ->follow_link() have changed.  Instead of returning
+	cookie and using nd_set_link() to store the body to traverse, we return
+	the body to traverse and store the cookie using explicit void ** argument.
+	nameidata isn't passed at all - nd_jump_link() doesn't need it and
+	nd_[gs]et_link() is gone.
+--
+[mandatory]
+	calling conventions for ->put_link() have changed.  It gets inode instead of
+	dentry,  it does not get nameidata at all and it gets called only when cookie
+	is non-NULL.  Note that link body isn't available anymore, so if you need it,
+	store it as cookie.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c3b6b301d8b0..6f7fafde0884 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -205,7 +205,7 @@ asynchronous manner and the value may not be very precise. To see a precise
 snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
 It's slow but very precise.
 
-Table 1-2: Contents of the status files (as of 3.20.0)
+Table 1-2: Contents of the status files (as of 4.1)
 ..............................................................................
  Field                       Content
  Name                        filename of the executable
@@ -235,6 +235,7 @@ Table 1-2: Contents of the status files (as of 3.20.0)
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
+ VmPMD                       size of second level page tables
  VmSwap                      size of swap usage (the number of referred swapents)
  Threads                     number of threads
  SigQ                        number of signals queued/max. number for queue
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt
index 5e8de25bf0f1..29fc01552646 100644
--- a/Documentation/filesystems/quota.txt
+++ b/Documentation/filesystems/quota.txt
@@ -32,7 +32,10 @@ The interface uses generic netlink framework (see
 http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
 details about this layer). The name of the quota generic netlink interface
 is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
-  Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
+Since the quota netlink protocol is not namespace aware, quota netlink messages
+are sent only in initial network namespace.
+
+Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
 This command is used to send a notification about any of the above mentioned
 events. Each message has six attributes. These are (type of the argument is
 in parentheses):
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5d833b32bbcd..5eb8456fc41e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
 	int (*rename2) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
-        void * (*follow_link) (struct dentry *, struct nameidata *);
-        void (*put_link) (struct dentry *, struct nameidata *, void *);
+	const char *(*follow_link) (struct dentry *, void **);
+	void (*put_link) (struct inode *, void *);
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
 
   follow_link: called by the VFS to follow a symbolic link to the
 	inode it points to.  Only required if you want to support
-	symbolic links.  This method returns a void pointer cookie
-	that is passed to put_link().
+	symbolic links.  This method returns the symlink body
+	to traverse (and possibly resets the current position with
+	nd_jump_link()).  If the body won't go away until the inode
+	is gone, nothing else is needed; if it needs to be otherwise
+	pinned, the data needed to release whatever we'd grabbed
+	is to be stored in void * variable passed by address to
+	follow_link() instance.
 
   put_link: called by the VFS to release resources allocated by
-  	follow_link().  The cookie returned by follow_link() is passed
-  	to this method as the last parameter.  It is used by
-  	filesystems such as NFS where page cache is not stable
-  	(i.e. page that was installed when the symbolic link walk
-  	started might not be in the page cache at the end of the
-  	walk).
+	follow_link().  The cookie stored by follow_link() is passed
+	to this method as the last parameter; only called when
+	cookie isn't NULL.
 
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
@@ -797,7 +799,7 @@ struct file_operations
 ----------------------
 
 This describes how the VFS can manipulate an open file. As of kernel
-3.12, the following members are defined:
+4.1, the following members are defined:
 
 struct file_operations {
 	struct module *owner;
@@ -811,8 +813,9 @@ struct file_operations {
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*mremap)(struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
-	int (*flush) (struct file *);
+	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
 	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
 	int (*aio_fsync) (struct kiocb *, int datasync);
@@ -822,11 +825,15 @@ struct file_operations {
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
 	int (*flock) (struct file *, int, struct file_lock *);
-	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
-	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long arg, struct file_lock **, void **);
-	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
+	long (*fallocate)(struct file *file, int mode, loff_t offset,
+			  loff_t len);
 	void (*show_fdinfo)(struct seq_file *m, struct file *f);
+#ifndef CONFIG_MMU
+	unsigned (*mmap_capabilities)(struct file *);
+#endif
 };
 
 Again, all methods are called without any locks being held, unless