diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 18 | ||||
-rw-r--r-- | Documentation/filesystems/automount-support.txt | 51 | ||||
-rw-r--r-- | Documentation/filesystems/dlmfs.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 6 | ||||
-rw-r--r-- | Documentation/filesystems/nfs/nfs-rdma.txt | 9 | ||||
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 4 | ||||
-rw-r--r-- | Documentation/filesystems/overlayfs.txt | 28 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 29 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 21 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 30 | ||||
-rw-r--r-- | Documentation/filesystems/xfs.txt | 29 |
11 files changed, 134 insertions, 95 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2ca3d17eee56..6a34a0f4d37c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -50,8 +50,8 @@ prototypes: int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); + const char *(*follow_link) (struct dentry *, void **); + void (*put_link) (struct inode *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); int (*get_acl)(struct inode *, int); @@ -164,8 +164,6 @@ the block device inode. See there for more details. --------------------------- file_system_type --------------------------- prototypes: - int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); @@ -198,7 +196,7 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); @@ -431,8 +429,6 @@ prototypes: loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); @@ -527,6 +523,7 @@ prototypes: void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); + int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *); int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: @@ -536,6 +533,7 @@ close: yes fault: yes can return with page locked map_pages: yes page_mkwrite: yes can return with page locked +pfn_mkwrite: yes access: yes ->fault() is called when a previously not present pte is about @@ -562,6 +560,12 @@ the page has been truncated, the filesystem should not look up a new page like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to retry the fault. + ->pfn_mkwrite() is the same as page_mkwrite but when the pte is +VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is +VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior +after this call is to make the pte read-write, unless pfn_mkwrite returns +an error. + ->access() is called when get_user_pages() fails in access_process_vm(), typically used to debug a process through /proc/pid/mem or ptrace. This function is needed only for diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt index 7cac200e2a85..7eb762eb3136 100644 --- a/Documentation/filesystems/automount-support.txt +++ b/Documentation/filesystems/automount-support.txt @@ -1,41 +1,15 @@ -Support is available for filesystems that wish to do automounting support (such -as kAFS which can be found in fs/afs/). This facility includes allowing -in-kernel mounts to be performed and mountpoint degradation to be -requested. The latter can also be requested by userspace. +Support is available for filesystems that wish to do automounting +support (such as kAFS which can be found in fs/afs/ and NFS in +fs/nfs/). This facility includes allowing in-kernel mounts to be +performed and mountpoint degradation to be requested. The latter can +also be requested by userspace. ====================== IN-KERNEL AUTOMOUNTING ====================== -A filesystem can now mount another filesystem on one of its directories by the -following procedure: - - (1) Give the directory a follow_link() operation. - - When the directory is accessed, the follow_link op will be called, and - it will be provided with the location of the mountpoint in the nameidata - structure (vfsmount and dentry). - - (2) Have the follow_link() op do the following steps: - - (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a - superblock and gain a vfsmount structure representing it. - - (b) Copy the nameidata provided as an argument and substitute the dentry - argument into it the copy. - - (c) Call do_add_mount() to install the new vfsmount into the namespace's - mountpoint tree, thus making it accessible to userspace. Use the - nameidata set up in (b) as the destination. - - If the mountpoint will be automatically expired, then do_add_mount() - should also be given the location of an expiration list (see further - down). - - (d) Release the path in the nameidata argument and substitute in the new - vfsmount and its root dentry. The ref counts on these will need - incrementing. +See section "Mount Traps" of Documentation/filesystems/autofs4.txt Then from userspace, you can just do something like: @@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY =========================== Automatic expiration of mountpoints is easy, provided you've mounted the -mountpoint to be expired in the automounting procedure outlined above. +mountpoint to be expired in the automounting procedure outlined separately. To do expiration, you need to follow these steps: - (3) Create at least one list off which the vfsmounts to be expired can be - hung. Access to this list will be governed by the vfsmount_lock. + (1) Create at least one list off which the vfsmounts to be expired can be + hung. - (4) In step (2c) above, the call to do_add_mount() should be provided with a - pointer to this list. It will hang the vfsmount off of it if it succeeds. + (2) When a new mountpoint is created in the ->d_automount method, add + the mnt to the list using mnt_set_expiry() + mnt_set_expiry(newmnt, &afs_vfsmounts); - (5) When you want mountpoints to be expired, call mark_mounts_for_expiry() + (3) When you want mountpoints to be expired, call mark_mounts_for_expiry() with a pointer to this list. This will process the list, marking every vfsmount thereon for potential expiry on the next call. diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt index 1b528b2ad809..fcf4d509d118 100644 --- a/Documentation/filesystems/dlmfs.txt +++ b/Documentation/filesystems/dlmfs.txt @@ -5,8 +5,8 @@ system. dlmfs is built with OCFS2 as it requires most of its infrastructure. -Project web page: http://oss.oracle.com/projects/ocfs2 -Tools web page: http://oss.oracle.com/projects/ocfs2-tools +Project web page: http://ocfs2.wiki.kernel.org +Tools web page: https://github.com/markfasheh/ocfs2-tools OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ All code copyright 2005 Oracle except when otherwise noted. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index dac11d7fef27..e9e750e59efc 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -140,6 +140,12 @@ nobarrier This option can be used if underlying storage guarantees fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance can be sacrificed. +extent_cache Enable an extent cache based on rb-tree, it can cache + as many as extent which map between contiguous logical + address and physical address per inode, resulting in + increasing the cache hit ratio. +noinline_data Disable the inline data feature, inline data feature is + enabled by default. ================================================================================ DEBUGFS ENTRIES diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt index 724043858b08..95c13aa575ff 100644 --- a/Documentation/filesystems/nfs/nfs-rdma.txt +++ b/Documentation/filesystems/nfs/nfs-rdma.txt @@ -187,8 +187,10 @@ Check RDMA and NFS Setup To further test the InfiniBand software stack, use IPoIB (this assumes you have two IB hosts named host1 and host2): - host1$ ifconfig ib0 a.b.c.x - host2$ ifconfig ib0 a.b.c.y + host1$ ip link set dev ib0 up + host1$ ip address add dev ib0 a.b.c.x + host2$ ip link set dev ib0 up + host2$ ip address add dev ib0 a.b.c.y host1$ ping a.b.c.y host2$ ping a.b.c.x @@ -229,7 +231,8 @@ NFS/RDMA Setup $ modprobe ib_mthca $ modprobe ib_ipoib - $ ifconfig ib0 a.b.c.d + $ ip li set dev ib0 up + $ ip addr add dev ib0 a.b.c.d NOTE: use unique addresses for the client and server diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 28f8c08201e2..4c49e5410595 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -8,8 +8,8 @@ also make it attractive for non-clustered use. You'll want to install the ocfs2-tools package in order to at least get "mount.ocfs2" and "ocfs2_hb_ctl". -Project web page: http://oss.oracle.com/projects/ocfs2 -Tools web page: http://oss.oracle.com/projects/ocfs2-tools +Project web page: http://ocfs2.wiki.kernel.org +Tools git tree: https://github.com/markfasheh/ocfs2-tools OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ All code copyright 2005 Oracle except when otherwise noted. diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index a27c950ece61..6db0e5d1da07 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -159,6 +159,22 @@ overlay filesystem (though an operation on the name of the file such as rename or unlink will of course be noticed and handled). +Multiple lower layers +--------------------- + +Multiple lower layers can now be given using the the colon (":") as a +separator character between the directory names. For example: + + mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged + +As the example shows, "upperdir=" and "workdir=" may be omitted. In +that case the overlay will be read-only. + +The specified lower directories will be stacked beginning from the +rightmost one and going left. In the above example lower1 will be the +top, lower2 the middle and lower3 the bottom layer. + + Non-standard behavior --------------------- @@ -196,3 +212,15 @@ Changes to the underlying filesystems while part of a mounted overlay filesystem are not allowed. If the underlying filesystem is changed, the behavior of the overlay is undefined, though it will not result in a crash or deadlock. + +Testsuite +--------- + +There's testsuite developed by David Howells at: + + git://git.infradead.org/users/dhowells/unionmount-testsuite.git + +Run as root: + + # cd unionmount-testsuite + # ./run --ov diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index fa2db081505e..3eae250254d5 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -471,3 +471,32 @@ in your dentry operations instead. [mandatory] f_dentry is gone; use f_path.dentry, or, better yet, see if you can avoid it entirely. +-- +[mandatory] + never call ->read() and ->write() directly; use __vfs_{read,write} or + wrappers; instead of checking for ->write or ->read being NULL, look for + FMODE_CAN_{WRITE,READ} in file->f_mode. +-- +[mandatory] + do _not_ use new_sync_{read,write} for ->read/->write; leave it NULL + instead. +-- +[mandatory] + ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter. +--- +[recommended] + for embedded ("fast") symlinks just set inode->i_link to wherever the + symlink body is and use simple_follow_link() as ->follow_link(). +-- +[mandatory] + calling conventions for ->follow_link() have changed. Instead of returning + cookie and using nd_set_link() to store the body to traverse, we return + the body to traverse and store the cookie using explicit void ** argument. + nameidata isn't passed at all - nd_jump_link() doesn't need it and + nd_[gs]et_link() is gone. +-- +[mandatory] + calling conventions for ->put_link() have changed. It gets inode instead of + dentry, it does not get nameidata at all and it gets called only when cookie + is non-NULL. Note that link body isn't available anymore, so if you need it, + store it as cookie. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a07ba61662ed..c3b6b301d8b0 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -200,12 +200,12 @@ contains details information about the process itself. Its fields are explained in Table 1-4. (for SMP CONFIG users) -For making accounting scalable, RSS related information are handled in -asynchronous manner and the vaule may not be very precise. To see a precise +For making accounting scalable, RSS related information are handled in an +asynchronous manner and the value may not be very precise. To see a precise snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. It's slow but very precise. -Table 1-2: Contents of the status files (as of 2.6.30-rc7) +Table 1-2: Contents of the status files (as of 3.20.0) .............................................................................. Field Content Name filename of the executable @@ -213,6 +213,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7) in an uninterruptible wait, Z is zombie, T is traced or stopped) Tgid thread group ID + Ngid NUMA group ID (0 if none) Pid process id PPid process id of the parent process TracerPid PID of process tracing this process (0 if not) @@ -220,6 +221,10 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7) Gid Real, effective, saved set, and file system GIDs FDSize number of file descriptor slots currently allocated Groups supplementary group list + NStgid descendant namespace thread group ID hierarchy + NSpid descendant namespace process ID hierarchy + NSpgid descendant namespace process group ID hierarchy + NSsid descendant namespace session ID hierarchy VmPeak peak virtual memory size VmSize total program size VmLck locked memory size @@ -1255,9 +1260,9 @@ Various pieces of information about kernel activity are available in the since the system first booted. For a quick look, simply cat the file: > cat /proc/stat - cpu 2255 34 2290 22625563 6290 127 456 0 0 - cpu0 1132 34 1441 11311718 3675 127 438 0 0 - cpu1 1123 0 849 11313845 2614 0 18 0 0 + cpu 2255 34 2290 22625563 6290 127 456 0 0 0 + cpu0 1132 34 1441 11311718 3675 127 438 0 0 0 + cpu1 1123 0 849 11313845 2614 0 18 0 0 0 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] ctxt 1990473 btime 1062191376 @@ -1704,6 +1709,10 @@ A typical output is flags: 0100002 mnt_id: 19 +All locks associated with a file descriptor are shown in its fdinfo too. + +lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF + The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags pair provide additional information particular to the objects they represent. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 966b22829f3b..b403b29ef710 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -350,8 +350,8 @@ struct inode_operations { int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); + const char *(*follow_link) (struct dentry *, void **); + void (*put_link) (struct inode *, void *); int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); @@ -436,16 +436,18 @@ otherwise noted. follow_link: called by the VFS to follow a symbolic link to the inode it points to. Only required if you want to support - symbolic links. This method returns a void pointer cookie - that is passed to put_link(). + symbolic links. This method returns the symlink body + to traverse (and possibly resets the current position with + nd_jump_link()). If the body won't go away until the inode + is gone, nothing else is needed; if it needs to be otherwise + pinned, the data needed to release whatever we'd grabbed + is to be stored in void * variable passed by address to + follow_link() instance. put_link: called by the VFS to release resources allocated by - follow_link(). The cookie returned by follow_link() is passed - to this method as the last parameter. It is used by - filesystems such as NFS where page cache is not stable - (i.e. page that was installed when the symbolic link walk - started might not be in the page cache at the end of the - walk). + follow_link(). The cookie stored by follow_link() is passed + to this method as the last parameter; only called when + cookie isn't NULL. permission: called by the VFS to check for access rights on a POSIX-like filesystem. @@ -590,7 +592,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); @@ -804,8 +806,6 @@ struct file_operations { loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); @@ -838,14 +838,10 @@ otherwise noted. read: called by read(2) and related system calls - aio_read: vectored, possibly asynchronous read - read_iter: possibly asynchronous read with iov_iter as destination write: called by write(2) and related system calls - aio_write: vectored, possibly asynchronous write - write_iter: possibly asynchronous write with iov_iter as source iterate: called when the VFS needs to read the directory contents diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 0bfafe108357..5a5a05582b58 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -228,30 +228,19 @@ default behaviour. Deprecated Mount Options ======================== - delaylog/nodelaylog - Delayed logging is the only logging method that XFS supports - now, so these mount options are now ignored. - - Due for removal in 3.12. - - ihashsize=value - In memory inode hashes have been removed, so this option has - no function as of August 2007. Option is deprecated. - - Due for removal in 3.12. +None at present. - irixsgid - This behaviour is now controlled by a sysctl, so the mount - option is ignored. - Due for removal in 3.12. +Removed Mount Options +===================== - osyncisdsync - osyncisosync - O_SYNC and O_DSYNC are fully supported, so there is no need - for these options any more. + Name Removed + ---- ------- + delaylog/nodelaylog v3.20 + ihashsize v3.20 + irixsgid v3.20 + osyncisdsync/osyncisosync v3.20 - Due for removal in 3.12. sysctls ======= |