diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 4 | ||||
-rw-r--r-- | Documentation/filesystems/automount-support.txt | 51 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/knfsd-stats.txt | 44 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 25 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 3 | ||||
-rw-r--r-- | Documentation/filesystems/quota.txt | 5 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 39 |
7 files changed, 69 insertions, 102 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 0a926e2ba3ab..6a34a0f4d37c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -50,8 +50,8 @@ prototypes: int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); + const char *(*follow_link) (struct dentry *, void **); + void (*put_link) (struct inode *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); int (*get_acl)(struct inode *, int); diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt index 7cac200e2a85..7eb762eb3136 100644 --- a/Documentation/filesystems/automount-support.txt +++ b/Documentation/filesystems/automount-support.txt @@ -1,41 +1,15 @@ -Support is available for filesystems that wish to do automounting support (such -as kAFS which can be found in fs/afs/). This facility includes allowing -in-kernel mounts to be performed and mountpoint degradation to be -requested. The latter can also be requested by userspace. +Support is available for filesystems that wish to do automounting +support (such as kAFS which can be found in fs/afs/ and NFS in +fs/nfs/). This facility includes allowing in-kernel mounts to be +performed and mountpoint degradation to be requested. The latter can +also be requested by userspace. ====================== IN-KERNEL AUTOMOUNTING ====================== -A filesystem can now mount another filesystem on one of its directories by the -following procedure: - - (1) Give the directory a follow_link() operation. - - When the directory is accessed, the follow_link op will be called, and - it will be provided with the location of the mountpoint in the nameidata - structure (vfsmount and dentry). - - (2) Have the follow_link() op do the following steps: - - (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a - superblock and gain a vfsmount structure representing it. - - (b) Copy the nameidata provided as an argument and substitute the dentry - argument into it the copy. - - (c) Call do_add_mount() to install the new vfsmount into the namespace's - mountpoint tree, thus making it accessible to userspace. Use the - nameidata set up in (b) as the destination. - - If the mountpoint will be automatically expired, then do_add_mount() - should also be given the location of an expiration list (see further - down). - - (d) Release the path in the nameidata argument and substitute in the new - vfsmount and its root dentry. The ref counts on these will need - incrementing. +See section "Mount Traps" of Documentation/filesystems/autofs4.txt Then from userspace, you can just do something like: @@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY =========================== Automatic expiration of mountpoints is easy, provided you've mounted the -mountpoint to be expired in the automounting procedure outlined above. +mountpoint to be expired in the automounting procedure outlined separately. To do expiration, you need to follow these steps: - (3) Create at least one list off which the vfsmounts to be expired can be - hung. Access to this list will be governed by the vfsmount_lock. + (1) Create at least one list off which the vfsmounts to be expired can be + hung. - (4) In step (2c) above, the call to do_add_mount() should be provided with a - pointer to this list. It will hang the vfsmount off of it if it succeeds. + (2) When a new mountpoint is created in the ->d_automount method, add + the mnt to the list using mnt_set_expiry() + mnt_set_expiry(newmnt, &afs_vfsmounts); - (5) When you want mountpoints to be expired, call mark_mounts_for_expiry() + (3) When you want mountpoints to be expired, call mark_mounts_for_expiry() with a pointer to this list. This will process the list, marking every vfsmount thereon for potential expiry on the next call. diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt index 64ced5149d37..1a5d82180b84 100644 --- a/Documentation/filesystems/nfs/knfsd-stats.txt +++ b/Documentation/filesystems/nfs/knfsd-stats.txt @@ -68,16 +68,10 @@ sockets-enqueued rate of change for this counter is zero; significantly non-zero values may indicate a performance limitation. - This can happen either because there are too few nfsd threads in the - thread pool for the NFS workload (the workload is thread-limited), - or because the NFS workload needs more CPU time than is available in - the thread pool (the workload is CPU-limited). In the former case, - configuring more nfsd threads will probably improve the performance - of the NFS workload. In the latter case, the sunrpc server layer is - already choosing not to wake idle nfsd threads because there are too - many nfsd threads which want to run but cannot, so configuring more - nfsd threads will make no difference whatsoever. The overloads-avoided - statistic (see below) can be used to distinguish these cases. + This can happen because there are too few nfsd threads in the thread + pool for the NFS workload (the workload is thread-limited), in which + case configuring more nfsd threads will probably improve the + performance of the NFS workload. threads-woken Counts how many times an idle nfsd thread is woken to try to @@ -88,36 +82,6 @@ threads-woken thing. The ideal rate of change for this counter will be close to but less than the rate of change of the packets-arrived counter. -overloads-avoided - Counts how many times the sunrpc server layer chose not to wake an - nfsd thread, despite the presence of idle nfsd threads, because - too many nfsd threads had been recently woken but could not get - enough CPU time to actually run. - - This statistic counts a circumstance where the sunrpc layer - heuristically avoids overloading the CPU scheduler with too many - runnable nfsd threads. The ideal rate of change for this counter - is zero. Significant non-zero values indicate that the workload - is CPU limited. Usually this is associated with heavy CPU usage - on all the CPUs in the nfsd thread pool. - - If a sustained large overloads-avoided rate is detected on a pool, - the top(1) utility should be used to check for the following - pattern of CPU usage on all the CPUs associated with the given - nfsd thread pool. - - - %us ~= 0 (as you're *NOT* running applications on your NFS server) - - - %wa ~= 0 - - - %id ~= 0 - - - %sy + %hi + %si ~= 100 - - If this pattern is seen, configuring more nfsd threads will *not* - improve the performance of the workload. If this patten is not - seen, then something more subtle is wrong. - threads-timedout Counts how many times an nfsd thread triggered an idle timeout, i.e. was not woken to handle any incoming network packets for diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index e69274de8d0c..68f1c9106573 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -379,10 +379,10 @@ may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be returned if the filesystem cannot handle rcu-walk. See Documentation/filesystems/vfs.txt for more details. - permission and check_acl are inode permission checks that are called -on many or all directory inodes on the way down a path walk (to check for -exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU). -See Documentation/filesystems/vfs.txt for more details. + permission is an inode permission check that is called on many or all +directory inodes on the way down a path walk (to check for exec permission). It +must now be rcu-walk aware (mask & MAY_NOT_BLOCK). See +Documentation/filesystems/vfs.txt for more details. -- [mandatory] @@ -483,3 +483,20 @@ in your dentry operations instead. -- [mandatory] ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter. +--- +[recommended] + for embedded ("fast") symlinks just set inode->i_link to wherever the + symlink body is and use simple_follow_link() as ->follow_link(). +-- +[mandatory] + calling conventions for ->follow_link() have changed. Instead of returning + cookie and using nd_set_link() to store the body to traverse, we return + the body to traverse and store the cookie using explicit void ** argument. + nameidata isn't passed at all - nd_jump_link() doesn't need it and + nd_[gs]et_link() is gone. +-- +[mandatory] + calling conventions for ->put_link() have changed. It gets inode instead of + dentry, it does not get nameidata at all and it gets called only when cookie + is non-NULL. Note that link body isn't available anymore, so if you need it, + store it as cookie. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index c3b6b301d8b0..6f7fafde0884 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -205,7 +205,7 @@ asynchronous manner and the value may not be very precise. To see a precise snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. It's slow but very precise. -Table 1-2: Contents of the status files (as of 3.20.0) +Table 1-2: Contents of the status files (as of 4.1) .............................................................................. Field Content Name filename of the executable @@ -235,6 +235,7 @@ Table 1-2: Contents of the status files (as of 3.20.0) VmExe size of text segment VmLib size of shared library code VmPTE size of page table entries + VmPMD size of second level page tables VmSwap size of swap usage (the number of referred swapents) Threads number of threads SigQ number of signals queued/max. number for queue diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt index 5e8de25bf0f1..29fc01552646 100644 --- a/Documentation/filesystems/quota.txt +++ b/Documentation/filesystems/quota.txt @@ -32,7 +32,10 @@ The interface uses generic netlink framework (see http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more details about this layer). The name of the quota generic netlink interface is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>. - Currently, the interface supports only one message type QUOTA_NL_C_WARNING. +Since the quota netlink protocol is not namespace aware, quota netlink messages +are sent only in initial network namespace. + +Currently, the interface supports only one message type QUOTA_NL_C_WARNING. This command is used to send a notification about any of the above mentioned events. Each message has six attributes. These are (type of the argument is in parentheses): diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 5d833b32bbcd..5eb8456fc41e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -350,8 +350,8 @@ struct inode_operations { int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); + const char *(*follow_link) (struct dentry *, void **); + void (*put_link) (struct inode *, void *); int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); @@ -436,16 +436,18 @@ otherwise noted. follow_link: called by the VFS to follow a symbolic link to the inode it points to. Only required if you want to support - symbolic links. This method returns a void pointer cookie - that is passed to put_link(). + symbolic links. This method returns the symlink body + to traverse (and possibly resets the current position with + nd_jump_link()). If the body won't go away until the inode + is gone, nothing else is needed; if it needs to be otherwise + pinned, the data needed to release whatever we'd grabbed + is to be stored in void * variable passed by address to + follow_link() instance. put_link: called by the VFS to release resources allocated by - follow_link(). The cookie returned by follow_link() is passed - to this method as the last parameter. It is used by - filesystems such as NFS where page cache is not stable - (i.e. page that was installed when the symbolic link walk - started might not be in the page cache at the end of the - walk). + follow_link(). The cookie stored by follow_link() is passed + to this method as the last parameter; only called when + cookie isn't NULL. permission: called by the VFS to check for access rights on a POSIX-like filesystem. @@ -797,7 +799,7 @@ struct file_operations ---------------------- This describes how the VFS can manipulate an open file. As of kernel -3.12, the following members are defined: +4.1, the following members are defined: struct file_operations { struct module *owner; @@ -811,8 +813,9 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); + int (*mremap)(struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); - int (*flush) (struct file *); + int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); @@ -822,11 +825,15 @@ struct file_operations { unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); - ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); - ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); - int (*setlease)(struct file *, long arg, struct file_lock **, void **); - long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len); + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); + ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **, void **); + long (*fallocate)(struct file *file, int mode, loff_t offset, + loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); +#ifndef CONFIG_MMU + unsigned (*mmap_capabilities)(struct file *); +#endif }; Again, all methods are called without any locks being held, unless |