summaryrefslogtreecommitdiff
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking18
-rw-r--r--Documentation/filesystems/automount-support.txt51
-rw-r--r--Documentation/filesystems/dlmfs.txt4
-rw-r--r--Documentation/filesystems/f2fs.txt6
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt9
-rw-r--r--Documentation/filesystems/ocfs2.txt4
-rw-r--r--Documentation/filesystems/overlayfs.txt28
-rw-r--r--Documentation/filesystems/porting29
-rw-r--r--Documentation/filesystems/proc.txt21
-rw-r--r--Documentation/filesystems/vfs.txt30
-rw-r--r--Documentation/filesystems/xfs.txt29
11 files changed, 134 insertions, 95 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2ca3d17eee56..6a34a0f4d37c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,8 @@ prototypes:
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
+ const char *(*follow_link) (struct dentry *, void **);
+ void (*put_link) (struct inode *, void *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, unsigned int);
int (*get_acl)(struct inode *, int);
@@ -164,8 +164,6 @@ the block device inode. See there for more details.
--------------------------- file_system_type ---------------------------
prototypes:
- int (*get_sb) (struct file_system_type *, int,
- const char *, void *, struct vfsmount *);
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
@@ -198,7 +196,7 @@ prototypes:
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+ int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
int (*migratepage)(struct address_space *, struct page *, struct page *);
int (*launder_page)(struct page *);
int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
@@ -431,8 +429,6 @@ prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
@@ -527,6 +523,7 @@ prototypes:
void (*close)(struct vm_area_struct*);
int (*fault)(struct vm_area_struct*, struct vm_fault *);
int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+ int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
locking rules:
@@ -536,6 +533,7 @@ close: yes
fault: yes can return with page locked
map_pages: yes
page_mkwrite: yes can return with page locked
+pfn_mkwrite: yes
access: yes
->fault() is called when a previously not present pte is about
@@ -562,6 +560,12 @@ the page has been truncated, the filesystem should not look up a new page
like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
will cause the VM to retry the fault.
+ ->pfn_mkwrite() is the same as page_mkwrite but when the pte is
+VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
+VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
+after this call is to make the pte read-write, unless pfn_mkwrite returns
+an error.
+
->access() is called when get_user_pages() fails in
access_process_vm(), typically used to debug a process through
/proc/pid/mem or ptrace. This function is needed only for
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7cac200e2a85..7eb762eb3136 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -1,41 +1,15 @@
-Support is available for filesystems that wish to do automounting support (such
-as kAFS which can be found in fs/afs/). This facility includes allowing
-in-kernel mounts to be performed and mountpoint degradation to be
-requested. The latter can also be requested by userspace.
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
======================
IN-KERNEL AUTOMOUNTING
======================
-A filesystem can now mount another filesystem on one of its directories by the
-following procedure:
-
- (1) Give the directory a follow_link() operation.
-
- When the directory is accessed, the follow_link op will be called, and
- it will be provided with the location of the mountpoint in the nameidata
- structure (vfsmount and dentry).
-
- (2) Have the follow_link() op do the following steps:
-
- (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
- superblock and gain a vfsmount structure representing it.
-
- (b) Copy the nameidata provided as an argument and substitute the dentry
- argument into it the copy.
-
- (c) Call do_add_mount() to install the new vfsmount into the namespace's
- mountpoint tree, thus making it accessible to userspace. Use the
- nameidata set up in (b) as the destination.
-
- If the mountpoint will be automatically expired, then do_add_mount()
- should also be given the location of an expiration list (see further
- down).
-
- (d) Release the path in the nameidata argument and substitute in the new
- vfsmount and its root dentry. The ref counts on these will need
- incrementing.
+See section "Mount Traps" of Documentation/filesystems/autofs4.txt
Then from userspace, you can just do something like:
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
===========================
Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined above.
+mountpoint to be expired in the automounting procedure outlined separately.
To do expiration, you need to follow these steps:
- (3) Create at least one list off which the vfsmounts to be expired can be
- hung. Access to this list will be governed by the vfsmount_lock.
+ (1) Create at least one list off which the vfsmounts to be expired can be
+ hung.
- (4) In step (2c) above, the call to do_add_mount() should be provided with a
- pointer to this list. It will hang the vfsmount off of it if it succeeds.
+ (2) When a new mountpoint is created in the ->d_automount method, add
+ the mnt to the list using mnt_set_expiry()
+ mnt_set_expiry(newmnt, &afs_vfsmounts);
- (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
with a pointer to this list. This will process the list, marking every
vfsmount thereon for potential expiry on the next call.
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt
index 1b528b2ad809..fcf4d509d118 100644
--- a/Documentation/filesystems/dlmfs.txt
+++ b/Documentation/filesystems/dlmfs.txt
@@ -5,8 +5,8 @@ system.
dlmfs is built with OCFS2 as it requires most of its infrastructure.
-Project web page: http://oss.oracle.com/projects/ocfs2
-Tools web page: http://oss.oracle.com/projects/ocfs2-tools
+Project web page: http://ocfs2.wiki.kernel.org
+Tools web page: https://github.com/markfasheh/ocfs2-tools
OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
All code copyright 2005 Oracle except when otherwise noted.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index dac11d7fef27..e9e750e59efc 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -140,6 +140,12 @@ nobarrier This option can be used if underlying storage guarantees
fastboot This option is used when a system wants to reduce mount
time as much as possible, even though normal performance
can be sacrificed.
+extent_cache Enable an extent cache based on rb-tree, it can cache
+ as many as extent which map between contiguous logical
+ address and physical address per inode, resulting in
+ increasing the cache hit ratio.
+noinline_data Disable the inline data feature, inline data feature is
+ enabled by default.
================================================================================
DEBUGFS ENTRIES
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
index 724043858b08..95c13aa575ff 100644
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2):
- host1$ ifconfig ib0 a.b.c.x
- host2$ ifconfig ib0 a.b.c.y
+ host1$ ip link set dev ib0 up
+ host1$ ip address add dev ib0 a.b.c.x
+ host2$ ip link set dev ib0 up
+ host2$ ip address add dev ib0 a.b.c.y
host1$ ping a.b.c.y
host2$ ping a.b.c.x
@@ -229,7 +231,8 @@ NFS/RDMA Setup
$ modprobe ib_mthca
$ modprobe ib_ipoib
- $ ifconfig ib0 a.b.c.d
+ $ ip li set dev ib0 up
+ $ ip addr add dev ib0 a.b.c.d
NOTE: use unique addresses for the client and server
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 28f8c08201e2..4c49e5410595 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -8,8 +8,8 @@ also make it attractive for non-clustered use.
You'll want to install the ocfs2-tools package in order to at least
get "mount.ocfs2" and "ocfs2_hb_ctl".
-Project web page: http://oss.oracle.com/projects/ocfs2
-Tools web page: http://oss.oracle.com/projects/ocfs2-tools
+Project web page: http://ocfs2.wiki.kernel.org
+Tools git tree: https://github.com/markfasheh/ocfs2-tools
OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
All code copyright 2005 Oracle except when otherwise noted.
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index a27c950ece61..6db0e5d1da07 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -159,6 +159,22 @@ overlay filesystem (though an operation on the name of the file such as
rename or unlink will of course be noticed and handled).
+Multiple lower layers
+---------------------
+
+Multiple lower layers can now be given using the the colon (":") as a
+separator character between the directory names. For example:
+
+ mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
+
+As the example shows, "upperdir=" and "workdir=" may be omitted. In
+that case the overlay will be read-only.
+
+The specified lower directories will be stacked beginning from the
+rightmost one and going left. In the above example lower1 will be the
+top, lower2 the middle and lower3 the bottom layer.
+
+
Non-standard behavior
---------------------
@@ -196,3 +212,15 @@ Changes to the underlying filesystems while part of a mounted overlay
filesystem are not allowed. If the underlying filesystem is changed,
the behavior of the overlay is undefined, though it will not result in
a crash or deadlock.
+
+Testsuite
+---------
+
+There's testsuite developed by David Howells at:
+
+ git://git.infradead.org/users/dhowells/unionmount-testsuite.git
+
+Run as root:
+
+ # cd unionmount-testsuite
+ # ./run --ov
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index fa2db081505e..3eae250254d5 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -471,3 +471,32 @@ in your dentry operations instead.
[mandatory]
f_dentry is gone; use f_path.dentry, or, better yet, see if you can avoid
it entirely.
+--
+[mandatory]
+ never call ->read() and ->write() directly; use __vfs_{read,write} or
+ wrappers; instead of checking for ->write or ->read being NULL, look for
+ FMODE_CAN_{WRITE,READ} in file->f_mode.
+--
+[mandatory]
+ do _not_ use new_sync_{read,write} for ->read/->write; leave it NULL
+ instead.
+--
+[mandatory]
+ ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter.
+---
+[recommended]
+ for embedded ("fast") symlinks just set inode->i_link to wherever the
+ symlink body is and use simple_follow_link() as ->follow_link().
+--
+[mandatory]
+ calling conventions for ->follow_link() have changed. Instead of returning
+ cookie and using nd_set_link() to store the body to traverse, we return
+ the body to traverse and store the cookie using explicit void ** argument.
+ nameidata isn't passed at all - nd_jump_link() doesn't need it and
+ nd_[gs]et_link() is gone.
+--
+[mandatory]
+ calling conventions for ->put_link() have changed. It gets inode instead of
+ dentry, it does not get nameidata at all and it gets called only when cookie
+ is non-NULL. Note that link body isn't available anymore, so if you need it,
+ store it as cookie.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a07ba61662ed..c3b6b301d8b0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -200,12 +200,12 @@ contains details information about the process itself. Its fields are
explained in Table 1-4.
(for SMP CONFIG users)
-For making accounting scalable, RSS related information are handled in
-asynchronous manner and the vaule may not be very precise. To see a precise
+For making accounting scalable, RSS related information are handled in an
+asynchronous manner and the value may not be very precise. To see a precise
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
It's slow but very precise.
-Table 1-2: Contents of the status files (as of 2.6.30-rc7)
+Table 1-2: Contents of the status files (as of 3.20.0)
..............................................................................
Field Content
Name filename of the executable
@@ -213,6 +213,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7)
in an uninterruptible wait, Z is zombie,
T is traced or stopped)
Tgid thread group ID
+ Ngid NUMA group ID (0 if none)
Pid process id
PPid process id of the parent process
TracerPid PID of process tracing this process (0 if not)
@@ -220,6 +221,10 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7)
Gid Real, effective, saved set, and file system GIDs
FDSize number of file descriptor slots currently allocated
Groups supplementary group list
+ NStgid descendant namespace thread group ID hierarchy
+ NSpid descendant namespace process ID hierarchy
+ NSpgid descendant namespace process group ID hierarchy
+ NSsid descendant namespace session ID hierarchy
VmPeak peak virtual memory size
VmSize total program size
VmLck locked memory size
@@ -1255,9 +1260,9 @@ Various pieces of information about kernel activity are available in the
since the system first booted. For a quick look, simply cat the file:
> cat /proc/stat
- cpu 2255 34 2290 22625563 6290 127 456 0 0
- cpu0 1132 34 1441 11311718 3675 127 438 0 0
- cpu1 1123 0 849 11313845 2614 0 18 0 0
+ cpu 2255 34 2290 22625563 6290 127 456 0 0 0
+ cpu0 1132 34 1441 11311718 3675 127 438 0 0 0
+ cpu1 1123 0 849 11313845 2614 0 18 0 0 0
intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
ctxt 1990473
btime 1062191376
@@ -1704,6 +1709,10 @@ A typical output is
flags: 0100002
mnt_id: 19
+All locks associated with a file descriptor are shown in its fdinfo too.
+
+lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
+
The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
pair provide additional information particular to the objects they represent.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 966b22829f3b..b403b29ef710 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
+ const char *(*follow_link) (struct dentry *, void **);
+ void (*put_link) (struct inode *, void *);
int (*permission) (struct inode *, int);
int (*get_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
follow_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
- symbolic links. This method returns a void pointer cookie
- that is passed to put_link().
+ symbolic links. This method returns the symlink body
+ to traverse (and possibly resets the current position with
+ nd_jump_link()). If the body won't go away until the inode
+ is gone, nothing else is needed; if it needs to be otherwise
+ pinned, the data needed to release whatever we'd grabbed
+ is to be stored in void * variable passed by address to
+ follow_link() instance.
put_link: called by the VFS to release resources allocated by
- follow_link(). The cookie returned by follow_link() is passed
- to this method as the last parameter. It is used by
- filesystems such as NFS where page cache is not stable
- (i.e. page that was installed when the symbolic link walk
- started might not be in the page cache at the end of the
- walk).
+ follow_link(). The cookie stored by follow_link() is passed
+ to this method as the last parameter; only called when
+ cookie isn't NULL.
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
@@ -590,7 +592,7 @@ struct address_space_operations {
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+ ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
@@ -804,8 +806,6 @@ struct file_operations {
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
@@ -838,14 +838,10 @@ otherwise noted.
read: called by read(2) and related system calls
- aio_read: vectored, possibly asynchronous read
-
read_iter: possibly asynchronous read with iov_iter as destination
write: called by write(2) and related system calls
- aio_write: vectored, possibly asynchronous write
-
write_iter: possibly asynchronous write with iov_iter as source
iterate: called when the VFS needs to read the directory contents
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 0bfafe108357..5a5a05582b58 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -228,30 +228,19 @@ default behaviour.
Deprecated Mount Options
========================
- delaylog/nodelaylog
- Delayed logging is the only logging method that XFS supports
- now, so these mount options are now ignored.
-
- Due for removal in 3.12.
-
- ihashsize=value
- In memory inode hashes have been removed, so this option has
- no function as of August 2007. Option is deprecated.
-
- Due for removal in 3.12.
+None at present.
- irixsgid
- This behaviour is now controlled by a sysctl, so the mount
- option is ignored.
- Due for removal in 3.12.
+Removed Mount Options
+=====================
- osyncisdsync
- osyncisosync
- O_SYNC and O_DSYNC are fully supported, so there is no need
- for these options any more.
+ Name Removed
+ ---- -------
+ delaylog/nodelaylog v3.20
+ ihashsize v3.20
+ irixsgid v3.20
+ osyncisdsync/osyncisosync v3.20
- Due for removal in 3.12.
sysctls
=======