From 0191f8697bbdfefcd36e7b8dc3eeddfe82893e4b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 24 May 2010 19:15:57 +0200 Subject: pipe: F_SETPIPE_SZ should return -EPERM for non-root If the passed in size is larger than what has been set as the system wide limit and the user is not root, we want to return permission denied (not invalid value). Signed-off-by: Jens Axboe --- fs/pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index d79872eba09a..01ca9fab95fa 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1170,7 +1170,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case F_SETPIPE_SZ: if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) - return -EINVAL; + return -EPERM; /* * The pipe needs to be at least 2 pages large to * guarantee POSIX behaviour. -- cgit v1.2.3 From b9598db3401282bb27b4aef77e3eee12015f7f29 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 24 May 2010 19:34:43 +0200 Subject: pipe: make F_{GET,SET}PIPE_SZ deal with byte sizes Instead of requiring an exact number of pages as the argument and return value, change the API to deal with number of bytes instead. This also relaxes the requirement that the passed in size must result in a power-of-2 page array size. Round up to the nearest power-of-2 automatically and return the resulting size of the pipe on success. Signed-off-by: Jens Axboe --- fs/pipe.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 01ca9fab95fa..bdd3f96054b9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1112,26 +1112,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) * Allocate a new array of pipe buffers and copy the info over. Returns the * pipe size if successful, or return -ERROR on error. */ -static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) { struct pipe_buffer *bufs; - /* - * Must be a power-of-2 currently - */ - if (!is_power_of_2(arg)) - return -EINVAL; - /* * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't * expect a lot of shrink+grow operations, just free and allocate * again like we would do for growing. If the pipe currently * contains more buffers than arg, then return busy. */ - if (arg < pipe->nrbufs) + if (nr_pages < pipe->nrbufs) return -EBUSY; - bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); + bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); if (unlikely(!bufs)) return -ENOMEM; @@ -1152,8 +1146,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) pipe->curbuf = 0; kfree(pipe->bufs); pipe->bufs = bufs; - pipe->buffers = arg; - return arg; + pipe->buffers = nr_pages; + return nr_pages * PAGE_SIZE; } long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1168,19 +1162,30 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&pipe->inode->i_mutex); switch (cmd) { - case F_SETPIPE_SZ: - if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) + case F_SETPIPE_SZ: { + unsigned long nr_pages; + + /* + * Currently the array must be a power-of-2 size, so adjust + * upwards if needed. + */ + nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; + nr_pages = roundup_pow_of_two(nr_pages); + + if (!capable(CAP_SYS_ADMIN) && nr_pages > pipe_max_pages) return -EPERM; + /* * The pipe needs to be at least 2 pages large to * guarantee POSIX behaviour. */ - if (arg < 2) + if (nr_pages < 2) return -EINVAL; - ret = pipe_set_size(pipe, arg); + ret = pipe_set_size(pipe, nr_pages); break; + } case F_GETPIPE_SZ: - ret = pipe->buffers; + ret = pipe->buffers * PAGE_SIZE; break; default: ret = -EINVAL; -- cgit v1.2.3 From 51921cb746f56983db5a373ca68deb2b0d3ddf01 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 26 May 2010 08:44:22 +0200 Subject: mm: export generic_pipe_buf_*() to modules This is needed by fuse device code which wants to create pipe buffers. Signed-off-by: Miklos Szeredi --- fs/pipe.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index d79872eba09a..8912fa8f1666 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -230,6 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, return kmap(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_map); /** * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer @@ -249,6 +250,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, } else kunmap(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_unmap); /** * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer @@ -279,6 +281,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, return 1; } +EXPORT_SYMBOL(generic_pipe_buf_steal); /** * generic_pipe_buf_get - get a reference to a &struct pipe_buffer @@ -294,6 +297,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_get(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_get); /** * generic_pipe_buf_confirm - verify contents of the pipe buffer @@ -309,6 +313,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info, { return 0; } +EXPORT_SYMBOL(generic_pipe_buf_confirm); /** * generic_pipe_buf_release - put a reference to a &struct pipe_buffer @@ -323,6 +328,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, { page_cache_release(buf->page); } +EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, -- cgit v1.2.3 From cc967be54710d97c05229b2e5ba2d00df84ddd64 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 17:54:39 +0200 Subject: fs: Add missing mutex_unlock Add a mutex_unlock missing on the error path. At other exists from the function that return an error flag, the mutex is unlocked, so do the same here. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * mutex_lock(E1,...); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * mutex_unlock(E1,...); // Signed-off-by: Julia Lawall Signed-off-by: Al Viro --- fs/pipe.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index d79872eba09a..60da077400f1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1169,14 +1169,18 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case F_SETPIPE_SZ: - if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) - return -EINVAL; + if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { + ret = -EINVAL; + goto out; + } /* * The pipe needs to be at least 2 pages large to * guarantee POSIX behaviour. */ - if (arg < 2) - return -EINVAL; + if (arg < 2) { + ret = -EINVAL; + goto out; + } ret = pipe_set_size(pipe, arg); break; case F_GETPIPE_SZ: @@ -1187,6 +1191,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) break; } +out: mutex_unlock(&pipe->inode->i_mutex); return ret; } -- cgit v1.2.3 From 6a6ca57de92fcae34603551ac944aa74758c30d4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jun 2010 12:44:30 +0200 Subject: pipe: adjust minimum pipe size to 1 page We don't need to pages to guarantee the POSIX requirement that upto a page size write must be atomic to an empty pipe. Signed-off-by: Jens Axboe --- fs/pipe.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 541d6626f9d9..369a0245aab6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1181,13 +1181,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN) && nr_pages > pipe_max_pages) { ret = -EPERM; goto out; - } - - /* - * The pipe needs to be at least 2 pages large to - * guarantee POSIX behaviour. - */ - if (arg < 2) { + } else if (nr_pages < 1) { ret = -EINVAL; goto out; } -- cgit v1.2.3 From 419f8367ea37e5adc5d95479e8fd5554b92b49fe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jun 2010 12:45:28 +0200 Subject: pipe: change the privilege required for growing a pipe beyond system max Change it to CAP_SYS_RESOURCE, as that more accurately models what we want to control. Suggested-by: Michael Kerrisk Signed-off-by: Jens Axboe --- fs/pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 369a0245aab6..f98fae3e36b0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1178,7 +1178,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; nr_pages = roundup_pow_of_two(nr_pages); - if (!capable(CAP_SYS_ADMIN) && nr_pages > pipe_max_pages) { + if (!capable(CAP_SYS_RESOURCE) && nr_pages > pipe_max_pages) { ret = -EPERM; goto out; } else if (nr_pages < 1) { -- cgit v1.2.3 From ff9da691c0498ff81fdd014e7a0731dab2337dac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jun 2010 14:54:39 +0200 Subject: pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface This changes the interface to be based on bytes instead. The API matches that of F_SETPIPE_SZ in that it rounds up the passed in size so that the resulting page array is a power-of-2 in size. The proc file is renamed to /proc/sys/fs/pipe-max-size to reflect this change. Signed-off-by: Jens Axboe --- fs/pipe.c | 54 ++++++++++++++++++++++++++++++++++++----------- include/linux/pipe_fs_i.h | 4 +++- kernel/sysctl.c | 8 +++---- 3 files changed, 49 insertions(+), 17 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index f98fae3e36b0..69c4c7c13ea9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -26,9 +26,14 @@ /* * The max size that a non-root user is allowed to grow the pipe. Can - * be set by root in /proc/sys/fs/pipe-max-pages + * be set by root in /proc/sys/fs/pipe-max-size */ -unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; +unsigned int pipe_max_size = 1048576; + +/* + * Minimum pipe size, as required by POSIX + */ +unsigned int pipe_min_size = PAGE_SIZE; /* * We use a start+len construction, which provides full use of the @@ -1156,6 +1161,35 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) return nr_pages * PAGE_SIZE; } +/* + * Currently we rely on the pipe array holding a power-of-2 number + * of pages. + */ +static inline unsigned int round_pipe_size(unsigned int size) +{ + unsigned long nr_pages; + + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; +} + +/* + * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax + * will return an error. + */ +int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); + if (ret < 0 || !write) + return ret; + + pipe_max_size = round_pipe_size(pipe_max_size); + return ret; +} + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; @@ -1169,23 +1203,19 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case F_SETPIPE_SZ: { - unsigned long nr_pages; + unsigned int size, nr_pages; - /* - * Currently the array must be a power-of-2 size, so adjust - * upwards if needed. - */ - nr_pages = (arg + PAGE_SIZE - 1) >> PAGE_SHIFT; - nr_pages = roundup_pow_of_two(nr_pages); + size = round_pipe_size(arg); + nr_pages = size >> PAGE_SHIFT; - if (!capable(CAP_SYS_RESOURCE) && nr_pages > pipe_max_pages) { + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { ret = -EPERM; goto out; - } else if (nr_pages < 1) { + } else if (nr_pages < PAGE_SIZE) { ret = -EINVAL; goto out; } - ret = pipe_set_size(pipe, arg); + ret = pipe_set_size(pipe, nr_pages); break; } case F_GETPIPE_SZ: diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 16de3933c45e..445796945ac9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -139,7 +139,9 @@ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); -extern unsigned int pipe_max_pages; +extern unsigned int pipe_max_size, pipe_min_size; +int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 997080f00e0b..d24f761f4876 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = { }, #endif { - .procname = "pipe-max-pages", - .data = &pipe_max_pages, + .procname = "pipe-max-size", + .data = &pipe_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &two, + .proc_handler = &pipe_proc_fn, + .extra1 = &pipe_min_size, }, /* * NOTE: do not add new entries to this table unless you have read -- cgit v1.2.3 From 1d862f41222b7f385bada9f85a67ca5592ffd33e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 8 Jun 2010 16:28:45 +0200 Subject: pipe: fix pipe buffer resizing pipe_set_size() needs to copy pipe bufs from the old circular buffer to the new. The current code gets this wrong in multiple ways, resulting in oops. Test program is available here: http://www.kernel.org/pub/linux/kernel/people/mszeredi/piperesize/ Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/pipe.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 69c4c7c13ea9..f31e2d472984 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1145,13 +1145,20 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) * and adjust the indexes. */ if (pipe->nrbufs) { - const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); - const unsigned int head = pipe->nrbufs - tail; + unsigned int tail; + unsigned int head; + tail = pipe->curbuf + pipe->nrbufs; + if (tail < pipe->buffers) + tail = 0; + else + tail &= (pipe->buffers - 1); + + head = pipe->nrbufs - tail; if (head) memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); if (tail) - memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); + memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); } pipe->curbuf = 0; -- cgit v1.2.3 From 6db40cf047a8723095caf79f5569d21b388d7b31 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 9 Jun 2010 09:27:57 +0200 Subject: pipe: fix check in "set size" fcntl As it stands this check compares the number of pages to the page size. This makes no sense and makes the fcntl fail in almost any sane case. Fix it by checking if nr_pages is not zero (it can become zero only if arg is too big and round_pipe_size() overflows). Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/pipe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index f31e2d472984..279eef96c51c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1215,12 +1215,13 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) size = round_pipe_size(arg); nr_pages = size >> PAGE_SHIFT; + ret = -EINVAL; + if (!nr_pages) + goto out; + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { ret = -EPERM; goto out; - } else if (nr_pages < PAGE_SIZE) { - ret = -EINVAL; - goto out; } ret = pipe_set_size(pipe, nr_pages); break; -- cgit v1.2.3 From e5953cbdff26f7cbae7eff30cd9b18c4e19b7594 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Thu, 21 Oct 2010 14:56:00 +0200 Subject: pipe: fix failure to return error code on ->confirm() The arguments were transposed, we want to assign the error code to 'ret', which is being returned. Signed-off-by: Nicolas Kaiser Cc: stable@kernel.org Signed-off-by: Jens Axboe --- fs/pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 279eef96c51c..37eb1ebeaa90 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -382,7 +382,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, error = ops->confirm(pipe, buf); if (error) { if (!ret) - error = ret; + ret = error; break; } -- cgit v1.2.3 From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 11:19:54 -0400 Subject: fs: do not assign default i_ino in new_inode Instead of always assigning an increasing inode number in new_inode move the call to assign it into those callers that actually need it. For now callers that need it is estimated conservatively, that is the call is added to all filesystems that do not assign an i_ino by themselves. For a few more filesystems we can avoid assigning any inode number given that they aren't user visible, and for others it could be done lazily when an inode number is actually needed, but that's left for later patches. Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- drivers/infiniband/hw/ipath/ipath_fs.c | 1 + drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/misc/ibmasm/ibmasmfs.c | 1 + drivers/oprofile/oprofilefs.c | 1 + drivers/usb/core/inode.c | 1 + drivers/usb/gadget/f_fs.c | 1 + drivers/usb/gadget/inode.c | 1 + fs/anon_inodes.c | 1 + fs/autofs4/inode.c | 1 + fs/binfmt_misc.c | 1 + fs/configfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/ext4/mballoc.c | 1 + fs/freevxfs/vxfs_inode.c | 1 + fs/fuse/control.c | 1 + fs/hugetlbfs/inode.c | 1 + fs/inode.c | 4 ++-- fs/ocfs2/dlmfs/dlmfs.c | 2 ++ fs/pipe.c | 2 ++ fs/proc/base.c | 2 ++ fs/proc/proc_sysctl.c | 2 ++ fs/ramfs/inode.c | 1 + fs/xfs/linux-2.6/xfs_buf.c | 1 + include/linux/fs.h | 1 + ipc/mqueue.c | 1 + kernel/cgroup.c | 1 + mm/shmem.c | 1 + net/socket.c | 1 + net/sunrpc/rpc_pipe.c | 1 + security/inode.c | 1 + security/selinux/selinuxfs.c | 1 + 31 files changed, 36 insertions(+), 2 deletions(-) (limited to 'fs/pipe.c') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index d13e72685dcf..12d5bf76302c 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0e6613e8be6..7e433d75c775 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index af2497ae5fe3..0a53500636c9 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 95f711b251ad..449de59bf35b 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 095fa5366690..e2f63c0ea09d 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index e4f595055208..e093fd8d04d3 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data, if (likely(inode)) { struct timespec current_time = CURRENT_TIME; + inode->i_ino = usbfs_get_inode(); inode->i_mode = perms->mode; inode->i_uid = perms->uid; inode->i_gid = perms->gid; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index d1d72d946b04..ba145e7fbe03 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb, struct inode *inode = new_inode (sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9c8e87b0361f..5365527ca43f 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -193,6 +193,7 @@ static struct inode *anon_inode_mkinode(void) if (!inode) return ERR_PTR(-ENOMEM); + inode->i_ino = get_next_ino(); inode->i_fop = &anon_inode_fops; inode->i_mapping->a_ops = &anon_aops; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 821b2b955dac..ac87e49fa706 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_gid = sb->s_root->d_inode->i_gid; } inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = get_next_ino(); if (S_ISDIR(inf->mode)) { inode->i_nlink = 2; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 139fc8083f53..29990f0eee0c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index cf78d44a8d6a..253476d78ed8 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; inode->i_op = &configfs_inode_operations; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 30a87b3dbcac..a4ed8380e98a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 19aa0d44d822..42f77b1dc72d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb) printk(KERN_ERR "EXT4-fs: can't get new inode\n"); goto err_freesgi; } + sbi->s_buddy_cache->i_ino = get_next_ino(); EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 79d1b4ea13e7..8c04eac5079d 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) struct inode *ip = NULL; if ((ip = new_inode(sbp))) { + ip->i_ino = get_next_ino(); vxfs_iinit(ip, vip); ip->i_mapping->a_ops = &vxfs_aops; } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 7367e177186f..4eba07661e5c 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = fc->user_id; inode->i_gid = fc->group_id; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 113eba3d3c38..8d0607b37266 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -455,6 +455,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode = new_inode(sb); if (inode) { struct hugetlbfs_inode_info *info; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; diff --git a/fs/inode.c b/fs/inode.c index 46a3e120b196..2cd2e48f7a20 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -735,7 +735,7 @@ repeat: #define LAST_INO_BATCH 1024 static DEFINE_PER_CPU(unsigned int, last_ino); -static unsigned int get_next_ino(void) +unsigned int get_next_ino(void) { unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -753,6 +753,7 @@ static unsigned int get_next_ino(void) put_cpu_var(last_ino); return res; } +EXPORT_SYMBOL(get_next_ino); /** * new_inode - obtain an inode @@ -776,7 +777,6 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index a7ebd9d42dc8..75e115f1bd73 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) if (inode) { ip = DLMFS_I(inode); + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); @@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/fs/pipe.c b/fs/pipe.c index 37eb1ebeaa90..d2d7566ce68e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void) if (!inode) goto fail_inode; + inode->i_ino = get_next_ino(); + pipe = alloc_pipe_info(inode); if (!pipe) goto fail_iput; diff --git a/fs/proc/base.c b/fs/proc/base.c index fb2a5abd4e4f..9883f1e18332 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1603,6 +1603,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st /* Common stuff */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_op = &proc_def_inode_operations; @@ -2549,6 +2550,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, /* Initialize the inode */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 2fc52552271d..b652cb00906b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (!inode) goto out; + inode->i_ino = get_next_ino(); + sysctl_head_get(head); ei = PROC_I(inode); ei->sysctl = head; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a5ebae70dc6d..67fadb1ad2c1 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ba5312802aa9..63fd2c07cb57 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1580,6 +1580,7 @@ xfs_mapping_buftarg( XFS_BUFTARG_NAME(btp)); return ENOMEM; } + inode->i_ino = get_next_ino(); inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; diff --git a/include/linux/fs.h b/include/linux/fs.h index bd6ae6c71fc8..4a573cf13f51 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2191,6 +2191,7 @@ extern struct inode * iget_locked(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); +extern unsigned int get_next_ino(void); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 80b35ffca25d..3a61ffefe884 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b69b8d0313d..9270d532ec3c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/mm/shmem.c b/mm/shmem.c index d4e2852526e6..f6d350e8adc5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; diff --git a/net/socket.c b/net/socket.c index d223725f99e5..5cac1c707755 100644 --- a/net/socket.c +++ b/net/socket.c @@ -480,6 +480,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); kmemcheck_annotate_bitfield(sock, type); + inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 52f252432144..7df92d237cb8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) struct inode *inode = new_inode(sb); if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { diff --git a/security/inode.c b/security/inode.c index 88839866cbcd..cb8f47c66a58 100644 --- a/security/inode.c +++ b/security/inode.c @@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 87e0556bae70..55a755c1a1bd 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } -- cgit v1.2.3 From 51139adac92f7160ad3ca1cab2de1b4b8d19dc96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 23:47:46 +0400 Subject: convert get_sb_pseudo() users Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 9 ++++----- drivers/mtd/mtdchar.c | 10 ++++------ fs/anon_inodes.c | 10 ++++------ fs/block_dev.c | 8 ++++---- fs/libfs.c | 14 ++++++-------- fs/pipe.c | 9 ++++----- include/linux/fs.h | 5 ++--- net/socket.c | 10 ++++------ 8 files changed, 32 insertions(+), 43 deletions(-) (limited to 'fs/pipe.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 6b1852f7f972..39e534f5a3b0 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -618,16 +618,15 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, } -static int -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry * +pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); + return mount_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); } static struct file_system_type pfm_fs_type = { .name = "pfmfs", - .get_sb = pfmfs_get_sb, + .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 5ef45487b65f..a34a0fe14884 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1030,17 +1030,15 @@ static const struct file_operations mtd_fops = { #endif }; -static int mtd_inodefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC, - mnt); + return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC); } static struct file_system_type mtd_inodefs_type = { .name = "mtd_inodefs", - .get_sb = mtd_inodefs_get_sb, + .mount = mtd_inodefs_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 5365527ca43f..57ce55b2564c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; static const struct file_operations anon_inode_fops; -static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, - mnt); + return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC); } /* @@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) static struct file_system_type anon_inode_fs_type = { .name = "anon_inodefs", - .get_sb = anon_inodefs_get_sb, + .mount = anon_inodefs_mount, .kill_sb = kill_anon_super, }; static const struct dentry_operations anon_inodefs_dentry_operations = { diff --git a/fs/block_dev.c b/fs/block_dev.c index dea3b628a6ce..06e8ff12b97c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = { .evict_inode = bdev_evict_inode, }; -static int bd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *bd_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); + return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); } static struct file_system_type bd_type = { .name = "bdev", - .get_sb = bd_get_sb, + .mount = bd_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/libfs.c b/fs/libfs.c index 304a5132ca27..a3accdf528ad 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = { * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ -int get_sb_pseudo(struct file_system_type *fs_type, char *name, - const struct super_operations *ops, unsigned long magic, - struct vfsmount *mnt) +struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, + const struct super_operations *ops, unsigned long magic) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); struct dentry *dentry; @@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, struct qstr d_name = {.name = name, .len = strlen(name)}; if (IS_ERR(s)) - return PTR_ERR(s); + return ERR_CAST(s); s->s_flags = MS_NOUSER; s->s_maxbytes = MAX_LFS_FILESIZE; @@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - simple_set_mnt(mnt, s); - return 0; + return dget(s->s_root); Enomem: deactivate_locked_super(s); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(generic_read_dir); -EXPORT_SYMBOL(get_sb_pseudo); +EXPORT_SYMBOL(mount_pseudo); EXPORT_SYMBOL(simple_write_begin); EXPORT_SYMBOL(simple_write_end); EXPORT_SYMBOL(simple_dir_inode_operations); diff --git a/fs/pipe.c b/fs/pipe.c index d2d7566ce68e..a8012a955720 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1247,16 +1247,15 @@ out: * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ -static int pipefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *pipefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); + return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { .name = "pipefs", - .get_sb = pipefs_get_sb, + .mount = pipefs_mount, .kill_sb = kill_anon_super, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c3a29ddcacb..43e6cfb5cbb3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1824,9 +1824,8 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), void *data); -extern int get_sb_pseudo(struct file_system_type *, char *, - const struct super_operations *ops, unsigned long, - struct vfsmount *mnt); +extern struct dentry *mount_pseudo(struct file_system_type *, char *, + const struct super_operations *ops, unsigned long); extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); static inline void sb_mark_dirty(struct super_block *sb) diff --git a/net/socket.c b/net/socket.c index ee3cd280c76e..5247ae10f374 100644 --- a/net/socket.c +++ b/net/socket.c @@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, - mnt); + return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); } static struct vfsmount *sock_mnt __read_mostly; static struct file_system_type sock_fs_type = { .name = "sockfs", - .get_sb = sockfs_get_sb, + .mount = sockfs_mount, .kill_sb = kill_anon_super, }; -- cgit v1.2.3 From c66fb347946ebdd5b10908866ecc9fa05ee2cf3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Nov 2010 14:09:57 -0800 Subject: Export 'get_pipe_info()' to other users And in particular, use it in 'pipe_fcntl()'. The other pipe functions do not need to use the 'careful' version, since they are only ever called for things that are already known to be pipes. The normal read/write/ioctl functions are called through the file operations structures, so if a file isn't a pipe, they'd never get called. But pipe_fcntl() is special, and called directly from the generic fcntl code, and needs to use the same careful function that the splice code is using. Cc: Jens Axboe Cc: Andrew Morton Cc: Al Viro Cc: Dave Jones Signed-off-by: Linus Torvalds --- fs/pipe.c | 2 +- fs/splice.c | 11 ----------- include/linux/pipe_fs_i.h | 12 ++++++++++++ 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index a8012a955720..b8997f8c6324 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1204,7 +1204,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) struct pipe_inode_info *pipe; long ret; - pipe = file->f_path.dentry->d_inode->i_pipe; + pipe = get_pipe_info(file); if (!pipe) return -EBADF; diff --git a/fs/splice.c b/fs/splice.c index 0d92dabcc576..ce2f02579e35 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1311,17 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} /* * Determine where to splice to/from. diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 445796945ac9..3c5ac3147428 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -161,4 +161,16 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +static inline struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + #endif -- cgit v1.2.3 From 72083646528d4887b920deb71b37e09bc7d227bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Nov 2010 16:27:19 -0800 Subject: Un-inline get_pipe_info() helper function This avoids some include-file hell, and the function isn't really important enough to be inlined anyway. Reported-by: Ingo Molnar Signed-off-by: Linus Torvalds --- fs/pipe.c | 12 ++++++++++++ include/linux/pipe_fs_i.h | 13 +------------ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index b8997f8c6324..04629f36e397 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1199,6 +1199,18 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, return ret; } +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3c5ac3147428..bb27d7ec2fb9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -160,17 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); - -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} +struct pipe_inode_info *get_pipe_info(struct file *file); #endif -- cgit v1.2.3 From ff0c7d15f9787b7e8c601533c015295cc68329f8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:50 +1100 Subject: fs: avoid inode RCU freeing for pseudo fs Pseudo filesystems that don't put inode on RCU list or reachable by rcu-walk dentries do not need to RCU free their inodes. Signed-off-by: Nick Piggin --- fs/inode.c | 6 ++++++ fs/pipe.c | 6 +++++- include/linux/fs.h | 1 + include/linux/net.h | 1 + net/socket.c | 17 +++++++++-------- 5 files changed, 22 insertions(+), 9 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/inode.c b/fs/inode.c index 6751dfe8cc06..da85e56378f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -257,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb) return inode; } +void free_inode_nonrcu(struct inode *inode) +{ + kmem_cache_free(inode_cachep, inode); +} +EXPORT_SYMBOL(free_inode_nonrcu); + void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); diff --git a/fs/pipe.c b/fs/pipe.c index 04629f36e397..ae3592dcc88c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1253,6 +1253,10 @@ out: return ret; } +static const struct super_operations pipefs_ops = { + .destroy_inode = free_inode_nonrcu, +}; + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need @@ -1262,7 +1266,7 @@ out: static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ff4d0a33b25..ea202fff44f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2233,6 +2233,7 @@ extern void iget_failed(struct inode *); extern void end_writeback(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); diff --git a/include/linux/net.h b/include/linux/net.h index 06bde4908473..16faa130088c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,6 +120,7 @@ enum sock_shutdown_cmd { struct socket_wq { wait_queue_head_t wait; struct fasync_struct *fasync_list; + struct rcu_head rcu; } ____cacheline_aligned_in_smp; /** diff --git a/net/socket.c b/net/socket.c index 97fff3a4e72f..817dc92e9ef8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -262,20 +262,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } -static void sock_free_rcu(struct rcu_head *head) + +static void wq_free_rcu(struct rcu_head *head) { - struct inode *inode = container_of(head, struct inode, i_rcu); - struct socket_alloc *ei = container_of(inode, struct socket_alloc, - vfs_inode); + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); - kfree(ei->socket.wq); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(sock_inode_cachep, ei); + kfree(wq); } static void sock_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, sock_free_rcu); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- fs/9p/vfs_inode.c | 26 +++++++++++++------------- fs/adfs/dir.c | 2 +- fs/adfs/super.c | 2 +- fs/affs/namei.c | 2 +- fs/affs/super.c | 2 +- fs/afs/dir.c | 2 +- fs/anon_inodes.c | 2 +- fs/autofs4/inode.c | 2 +- fs/autofs4/root.c | 10 +++++----- fs/btrfs/export.c | 4 ++-- fs/btrfs/inode.c | 2 +- fs/ceph/dir.c | 6 +++--- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- fs/coda/dir.c | 2 +- fs/configfs/dir.c | 8 ++++---- fs/dcache.c | 30 ++++++++++++++++++++++++++---- fs/ecryptfs/inode.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fat/namei_msdos.c | 6 +++--- fs/fat/namei_vfat.c | 8 ++++---- fs/fuse/dir.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/export.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dentry.c | 2 +- fs/isofs/inode.c | 2 +- fs/isofs/namei.c | 2 +- fs/jfs/namei.c | 4 ++-- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 31 ++++++++++++++++++++----------- fs/ncpfs/dir.c | 4 ++-- fs/ncpfs/inode.c | 2 +- fs/nfs/dir.c | 6 +++--- fs/nfs/getroot.c | 4 ++-- fs/ocfs2/export.c | 4 ++-- fs/ocfs2/namei.c | 10 +++++----- fs/pipe.c | 2 +- fs/proc/base.c | 12 ++++++------ fs/proc/generic.c | 2 +- fs/proc/proc_sysctl.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/sysfs/dir.c | 2 +- fs/sysv/namei.c | 2 +- fs/sysv/super.c | 2 +- include/linux/dcache.h | 6 ++++++ kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 63 files changed, 174 insertions(+), 137 deletions(-) (limited to 'fs/pipe.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d39d8a53b579..5a24f40bb48e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - path.dentry->d_op = &pfmfs_dentry_operations; + d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd84..b09adb57971f 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4a..78f09412740c 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f6f9081e6d2c..df8bbb358d54 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index a11e5e102716..bf7693c384f9 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 47dffc513a26..a4041b52fbca 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 5aca08c21100..944a4042fb65 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 4c18fcfb0a19..d39081bbe7ce 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -482,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c18cde27000..b8bb7e7148d0 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564c..aca8806fa206 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa706..a7bdb9dcac84 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 10ca68a96dc7..bfe3f2eb684d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 659f532d26a0..0ccf9a8afcdf 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9d2994a42a2..63e4546b478a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 58abc3da6111..cc01cf826769 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac00..e3b10ca6d453 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b7..2a239d878e85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b0..ec5b68e3b928 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 9e37e8bc9b89..aa40c811f8d2 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e9acea440ffc..36637a8c1ed3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir, */ if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); return NULL; } @@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { - child->d_op = &configfs_dentry_ops; + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/dcache.c b/fs/dcache.c index 1d5cf511e1c7..f9693da3efbd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -398,7 +398,7 @@ repeat: return; } - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } @@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); @@ -1731,7 +1753,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) @@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) */ tlen = dentry->d_name.len; tname = dentry->d_name.name; - if (parent->d_op && parent->d_op->d_compare) { + if (parent->d_flags & DCACHE_OP_COMPARE) { if (parent->d_op->d_compare(parent, parent->d_inode, dentry, dentry->d_inode, tlen, tname, name)) @@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_flags & DCACHE_OP_HASH) { if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e5c7ec1fc98..f91b35db4c6e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e6..351038675376 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8cccfebee180..206351af7c58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b3e072d8982..35ffe43afa4b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4fc06278db48..3be5ed7d859f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..c9a8a426a395 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 44e0a6c57e87..a8b31da19b93 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb9..97012ecff560 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..2aeabd4218cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..f28f89796f4d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41b..ea4aefe7c652 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ef4ee5716abe..0bef62aa4f42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f59..ccab87145f7a 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 182e83a9079e..ddf712e4700e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 861113fcfc88..0bc81cf256b8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 35526df1fd32..32c13a94e1e9 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d8f3a652243d..844a7903c72f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -949,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 715f7d318046..679a849c3b27 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 57f90dad8919..a151cbdec626 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b715b0f7bdfd..3150d766e0d4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index 28b36663c44e..889311e3d06b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3accef..1b9e07728a9f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index c731b50a6184..90bd2873e117 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; status = dentry->d_op->d_revalidate(dentry, nd); @@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (parent->d_op && parent->d_op->d_hash) { + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; @@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_op && dentry->d_op->d_revalidate) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { /* We commonly drop rcu-walk here */ if (nameidata_dentry_drop_rcu(nd, dentry)) return -ECHILD; @@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, if (!dentry) goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) + if (dentry->d_flags & DCACHE_OP_REVALIDATE) goto need_revalidate; done: path->mnt = mnt; @@ -1281,8 +1291,7 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { + if (need_reval_dot(nd->path.dentry)) { if (nameidata_drop_rcu_maybe(nd)) return -ECHILD; err = -ESTALE; @@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { + if (base->d_flags & DCACHE_OP_HASH) { err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) @@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { + if (need_reval_dot(dir)) { if (!dir->d_op->d_revalidate(dir, nd)) { error = -ESTALE; goto exit; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0ba3cdc95a44..4b9cbb28d7fa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 60047dbeb38d..0c75a5f3cafd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eb77471b8823..37e0a8bb077e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b3e36c3430de..c3a5a1126833 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af3..6adafa576065 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ff5744e1e36f..d14cad6e2e41 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/pipe.c b/fs/pipe.c index ae3592dcc88c..01a786567810 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; diff --git a/fs/proc/base.c b/fs/proc/base.c index d932fdb6a245..85f0a80912aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1d607be36d95..f766be29d2c7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 998e3a715bcc..35efd85a4d32 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7a..e0f0d7ea10a1 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 27e1102e303e..3e076caa8daf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 7507aeb4c90e..b5e68da2db32 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c10..76712aefc4ab 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e4414693065e..f4b40a751f09 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -183,6 +183,11 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_MOUNTED 0x0400 /* is a mountpoint */ +#define DCACHE_OP_HASH 0x1000 +#define DCACHE_OP_COMPARE 0x2000 +#define DCACHE_OP_REVALIDATE 0x4000 +#define DCACHE_OP_DELETE 0x8000 + extern spinlock_t dcache_inode_lock; extern seqlock_t rename_lock; @@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); +extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9f41470c3949..51cddc11cd85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &cgroup_dentry_operations; + d_set_d_op(dentry, &cgroup_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/net/socket.c b/net/socket.c index 817dc92e9ef8..991e266bc7ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; + d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2899fe27f880..09f01f41e55a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent, } } if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; + d_set_d_op(dentry, &rpc_dentry_operations); out_err: return dentry; } -- cgit v1.2.3 From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:07 +1100 Subject: fs: improve scalability of pseudo filesystems Regardless of how much we possibly try to scale dcache, there is likely always going to be some fundamental contention when adding or removing children under the same parent. Pseudo filesystems do not seem need to have connected dentries because by definition they are disconnected. Signed-off-by: Nick Piggin --- fs/anon_inodes.c | 2 +- fs/dcache.c | 12 ++++++++++++ fs/pipe.c | 2 +- include/linux/dcache.h | 1 + net/socket.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index aca8806fa206..9d92b33da8a0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name, this.name = name; this.len = strlen(name); this.hash = 0; - path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); if (!path.dentry) goto err_module; diff --git a/fs/dcache.c b/fs/dcache.c index 09ec945f3c98..9e6e6db76869 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; diff --git a/fs/pipe.c b/fs/pipe.c index 01a786567810..cfe3a7f2ee21 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags) goto err; err = -ENOMEM; - path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d719e4de8046..c0a2ca97c72f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_obtain_alias(struct inode *); diff --git a/net/socket.c b/net/socket.c index 991e266bc7ae..0ee74c325320 100644 --- a/net/socket.c +++ b/net/socket.c @@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; -- cgit v1.2.3 From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:11 +1100 Subject: fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin --- arch/ia64/kernel/perfmon.c | 2 +- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 26 +++-- fs/internal.h | 1 + fs/namei.c | 24 +++++ fs/namespace.c | 242 +++++++++++++++++++++++++++++++++++++-------- fs/pipe.c | 2 +- fs/pnode.c | 4 +- fs/super.c | 2 +- include/linux/mount.h | 53 ++++------ include/linux/path.h | 2 + net/socket.c | 19 +++- 13 files changed, 283 insertions(+), 98 deletions(-) (limited to 'fs/pipe.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a24f40bb48e..f099b82703d8 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) * any operations on the root directory. However, we need a non-trivial * d_name - pfm: will go nicely and kill the special-casing in procfs. */ -static struct vfsmount *pfmfs_mnt; +static struct vfsmount *pfmfs_mnt __read_mostly; static int __init init_pfm_fs(void) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 4759d827e8c7..f511dd15fd31 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput(mtd_inode_mnt); + mntput_long(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9d92b33da8a0..5fd38112a6ca 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -232,7 +232,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + mntput_long(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 60b8531f41c5..68ca487bedb1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put(&old_root); + path_put_long(&old_root); } /* @@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get(path); + path_get_long(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put(&old_pwd); + path_put_long(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put(old_root); + path_put_long(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put(&fs->root); - path_put(&fs->pwd); + path_put_long(&fs->root); + path_put_long(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock_init(&fs->lock); seqcount_init(&fs->seq); fs->umask = old->umask; - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); + + spin_lock(&old->lock); + fs->root = old->root; + path_get_long(&fs->root); + fs->pwd = old->pwd; + path_get_long(&fs->pwd); + spin_unlock(&old->lock); } return fs; } diff --git a/fs/internal.h b/fs/internal.h index e43b9a4dbf4e..9687c2ee2735 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); +extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); diff --git a/fs/namei.c b/fs/namei.c index 4e957bf744ae..19433cdba011 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,6 +367,18 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); +/** + * path_get_long - get a long reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get_long(struct path *path) +{ + mntget_long(path->mnt); + dget(path->dentry); +} + /** * path_put - put a reference to a path * @path: path to put the reference to @@ -380,6 +392,18 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); +/** + * path_put_long - put a long reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. + */ +void path_put_long(struct path *path) +{ + dput(path->dentry); + mntput_long(path->mnt); +} + /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index 03b82350f020..3ddfd9046c44 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = atomic_read(&mnt->mnt_longrefs); + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + atomic_set(&mnt->mnt_longrefs, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; #endif } return mnt; @@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly); static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif @@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt) static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif @@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt) int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so mnt_get_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); @@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt) deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) -{ -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) - return; +#ifdef CONFIG_SMP +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ + if (!longrefs) { +put_again: + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longrefs))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); + return; + } + br_read_unlock(vfsmount_lock); + } else { + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) + return; + } + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + if (!longrefs) + mnt_dec_count(mnt); + else + atomic_dec(&mnt->mnt_longrefs); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - __mntput(mnt); + acct_auto_close_mnt(mnt); + goto put_again; + } + br_write_unlock(vfsmount_lock); + mntfree(mnt); +} +#else +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ +put_again: + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); +} +#endif + +static void mntput_no_expire(struct vfsmount *mnt) +{ + __mntput(mnt, 0); +} + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 0); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; +} +EXPORT_SYMBOL(mntget); + +void mntput_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 1); + } +#else + mntput(mnt); +#endif } -EXPORT_SYMBOL(mntput_no_expire); +EXPORT_SYMBOL(mntput_long); + +struct vfsmount *mntget_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) + atomic_inc(&mnt->mnt_longrefs); + return mnt; +#else + return mntget(mnt); +#endif +} +EXPORT_SYMBOL(mntget_long); void mnt_pin(struct vfsmount *mnt) { @@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput(mnt); + mntput_long(mnt); } } @@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_lock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, unlock: up_write(&namespace_sem); - mntput(newmnt); + mntput_long(newmnt); return err; } @@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (fs) { if (p == fs->root.mnt) { rootmnt = p; - fs->root.mnt = mntget(q); + fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwd.mnt = mntget(q); + fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput(rootmnt); + mntput_long(rootmnt); if (pwdmnt) - mntput(pwdmnt); + mntput_long(pwdmnt); return new_ns; } @@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); + error = 0; path_put(&root_parent); path_put(&parent_path); @@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); diff --git a/fs/pipe.c b/fs/pipe.c index cfe3a7f2ee21..68f1f8e4e23b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); + mntput_long(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/pnode.c b/fs/pnode.c index 8066b8dd748f..d42514e32380 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -288,7 +288,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; return (mycount > count); } @@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * - * vfsmount lock must be held for read or write + * vfsmount lock must be held for write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { diff --git a/fs/super.c b/fs/super.c index 968ba013011a..823e061faa87 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput(mnt); + mntput_long(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index 5e7a59408dd4..1869ea24a739 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -13,6 +13,7 @@ #include #include #include +#include #include struct super_block; @@ -46,12 +47,24 @@ struct mnt_namespace; #define MNT_INTERNAL 0x4000 +struct mnt_pcp { + int mnt_count; + int mnt_writers; +}; + struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; + atomic_t mnt_longrefs; +#else + int mnt_count; + int mnt_writers; +#endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ int mnt_flags; @@ -70,57 +83,25 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ - /* - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount - * to let these frequently modified fields in a separate cache line - * (so that reads of mnt_flags wont ping-pong on SMP machines) - */ - atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; -#ifdef CONFIG_SMP - int __percpu *mnt_writers; -#else - int mnt_writers; -#endif }; -static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - return mnt->mnt_writers; -#else - return &mnt->mnt_writers; -#endif -} - -static inline struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) - atomic_inc(&mnt->mnt_count); - return mnt; -} - struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); -extern void mntput_no_expire(struct vfsmount *mnt); +extern void mntput(struct vfsmount *mnt); +extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mntput_long(struct vfsmount *mnt); +extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); -static inline void mntput(struct vfsmount *mnt) -{ - if (mnt) { - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } -} - extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec6266..a581e8c06533 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,7 +10,9 @@ struct path { }; extern void path_get(struct path *); +extern void path_get_long(struct path *); extern void path_put(struct path *); +extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { diff --git a/net/socket.c b/net/socket.c index 0ee74c325320..815bba3d2fe0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2406,8 +2408,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2420,7 +2429,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ -- cgit v1.2.3 From c74a1cbb3cac348f276fabc381758f5b0b4713b2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Jan 2011 16:59:34 -0500 Subject: pass default dentry_operations to mount_pseudo() Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 6 ++++-- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 21 +++++++++++---------- fs/block_dev.c | 2 +- fs/libfs.c | 4 +++- fs/pipe.c | 4 ++-- include/linux/fs.h | 4 +++- net/socket.c | 30 +++++++++++++++--------------- 8 files changed, 40 insertions(+), 33 deletions(-) (limited to 'fs/pipe.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d92d5b5161fc..ac76da099a6d 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -617,11 +617,14 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, return get_unmapped_area(file, addr, len, pgoff, flags); } +/* forward declaration */ +static static const struct dentry_operations pfmfs_dentry_operations; static struct dentry * pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); + return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, + PFMFS_MAGIC); } static struct file_system_type pfm_fs_type = { @@ -2232,7 +2235,6 @@ pfm_alloc_file(pfm_context_t *ctx) } path.mnt = mntget(pfmfs_mnt); - d_set_d_op(path.dentry, &pfmfs_dentry_operations); d_add(path.dentry, inode); file = alloc_file(&path, FMODE_READ, &pfm_file_ops); diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index f511dd15fd31..ee4bb3330bdf 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1134,7 +1134,7 @@ static const struct file_operations mtd_fops = { static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC); + return mount_pseudo(fs_type, "mtd_inode:", NULL, NULL, MTD_INODE_FS_MAGIC); } static struct file_system_type mtd_inodefs_type = { diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 5fd38112a6ca..549a53cc0283 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -26,12 +26,6 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; static const struct file_operations anon_inode_fops; -static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC); -} - /* * anon_inodefs_dname() is called from d_path(). */ @@ -41,14 +35,22 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) dentry->d_name.name); } +static const struct dentry_operations anon_inodefs_dentry_operations = { + .d_dname = anon_inodefs_dname, +}; + +static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "anon_inode:", NULL, + &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); +} + static struct file_system_type anon_inode_fs_type = { .name = "anon_inodefs", .mount = anon_inodefs_mount, .kill_sb = kill_anon_super, }; -static const struct dentry_operations anon_inodefs_dentry_operations = { - .d_dname = anon_inodefs_dname, -}; /* * nop .set_page_dirty method so that people can use .page_mkwrite on @@ -113,7 +115,6 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; diff --git a/fs/block_dev.c b/fs/block_dev.c index 771f23527010..88da70355aa3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -473,7 +473,7 @@ static const struct super_operations bdev_sops = { static struct dentry *bd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); + return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576); } static struct file_system_type bd_type = { diff --git a/fs/libfs.c b/fs/libfs.c index 889311e3d06b..c88eab55aec9 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -217,7 +217,8 @@ static const struct super_operations simple_super_operations = { * will never be mountable) */ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, - const struct super_operations *ops, unsigned long magic) + const struct super_operations *ops, + const struct dentry_operations *dops, unsigned long magic) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); struct dentry *dentry; @@ -254,6 +255,7 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, dentry->d_parent = dentry; d_instantiate(dentry, root); s->s_root = dentry; + s->s_d_op = dops; s->s_flags |= MS_ACTIVE; return dget(s->s_root); diff --git a/fs/pipe.c b/fs/pipe.c index 68f1f8e4e23b..6b0255a74f36 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1004,7 +1004,6 @@ struct file *create_write_pipe(int flags) goto err_inode; path.mnt = mntget(pipe_mnt); - d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; @@ -1266,7 +1265,8 @@ static const struct super_operations pipefs_ops = { static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC); + return mount_pseudo(fs_type, "pipe:", &pipefs_ops, + &pipefs_dentry_operations, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e4c27486e74..c0701288d204 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1835,7 +1835,9 @@ struct super_block *sget(struct file_system_type *type, int (*set)(struct super_block *,void *), void *data); extern struct dentry *mount_pseudo(struct file_system_type *, char *, - const struct super_operations *ops, unsigned long); + const struct super_operations *ops, + const struct dentry_operations *dops, + unsigned long); extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); static inline void sb_mark_dirty(struct super_block *sb) diff --git a/net/socket.c b/net/socket.c index ccc576a6a508..ac2219f90d5d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -306,20 +306,6 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static struct dentry *sockfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .mount = sockfs_mount, - .kill_sb = kill_anon_super, -}; - /* * sockfs_dname() is called from d_path(). */ @@ -333,6 +319,21 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "socket:", &sockfs_ops, + &sockfs_dentry_operations, SOCKFS_MAGIC); +} + +static struct vfsmount *sock_mnt __read_mostly; + +static struct file_system_type sock_fs_type = { + .name = "sockfs", + .mount = sockfs_mount, + .kill_sb = kill_anon_super, +}; + /* * Obtains the first available file descriptor and sets it up for use. * @@ -368,7 +369,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) } path.mnt = mntget(sock_mnt); - d_set_d_op(path.dentry, &sockfs_dentry_operations); d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; -- cgit v1.2.3 From e462c448fdc89252d631b26ff0ed4f7ad6fe8ed2 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Wed, 12 Jan 2011 17:00:25 -0800 Subject: pipe: use event aware wakeups Send the events the wakeup refers to, so that epoll, and even the new poll code in fs/select.c can avoid wakeups if the events do not match the requested set. Signed-off-by: Davide Libenzi Acked-by: David S. Miller Acked-by: Eric Dumazet Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 68f1f8e4e23b..04151e2aee96 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -441,7 +441,7 @@ redo: break; } if (do_wakeup) { - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } pipe_wait(pipe); @@ -450,7 +450,7 @@ redo: /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } if (ret > 0) @@ -612,7 +612,7 @@ redo2: break; } if (do_wakeup) { - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); do_wakeup = 0; } @@ -623,7 +623,7 @@ redo2: out: mutex_unlock(&inode->i_mutex); if (do_wakeup) { - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } if (ret > 0) @@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw) if (!pipe->readers && !pipe->writers) { free_pipe_info(inode); } else { - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } -- cgit v1.2.3 From f03c65993b98eeb909a4012ce7833c5857d74755 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Jan 2011 22:30:21 -0500 Subject: sanitize vfsmount refcounting changes Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro --- drivers/mtd/mtdchar.c | 2 +- fs/anon_inodes.c | 2 +- fs/fs_struct.c | 35 ++++++++++----- fs/internal.h | 3 ++ fs/namei.c | 24 ----------- fs/namespace.c | 116 +++++++++++++++++++------------------------------- fs/pipe.c | 2 +- fs/super.c | 2 +- include/linux/mount.h | 4 +- include/linux/path.h | 2 - 10 files changed, 75 insertions(+), 117 deletions(-) (limited to 'fs/pipe.c') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index ee4bb3330bdf..98240575a18d 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput_long(mtd_inode_mnt); + mntput(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index cbe57f3c4d89..c5567cb78432 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -233,7 +233,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput_long(anon_inode_mnt); + mntput(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 68ca487bedb1..78b519c13536 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -4,6 +4,19 @@ #include #include #include +#include "internal.h" + +static inline void path_get_longterm(struct path *path) +{ + path_get(path); + mnt_make_longterm(path->mnt); +} + +static inline void path_put_longterm(struct path *path) +{ + mnt_make_shortterm(path->mnt); + path_put(path); +} /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. @@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put_long(&old_root); + path_put_longterm(&old_root); } /* @@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get_long(path); + path_get_longterm(path); write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put_long(&old_pwd); + path_put_longterm(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get_long(new_root); + path_get_longterm(new_root); fs->pwd = *new_root; count++; } @@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put_long(old_root); + path_put_longterm(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put_long(&fs->root); - path_put_long(&fs->pwd); + path_put_longterm(&fs->root); + path_put_longterm(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock(&old->lock); fs->root = old->root; - path_get_long(&fs->root); + path_get_longterm(&fs->root); fs->pwd = old->pwd; - path_get_long(&fs->pwd); + path_get_longterm(&fs->pwd); spin_unlock(&old->lock); } return fs; diff --git a/fs/internal.h b/fs/internal.h index 4931060fd089..12ccb86edef7 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern int do_add_mount(struct vfsmount *, struct path *, int); extern void mnt_clear_expiry(struct vfsmount *); +extern void mnt_make_longterm(struct vfsmount *); +extern void mnt_make_shortterm(struct vfsmount *); + extern void __init mnt_init(void); DECLARE_BRLOCK(vfsmount_lock); diff --git a/fs/namei.c b/fs/namei.c index c2e37727e3ab..8f7b41a14882 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -367,18 +367,6 @@ void path_get(struct path *path) } EXPORT_SYMBOL(path_get); -/** - * path_get_long - get a long reference to a path - * @path: path to get the reference to - * - * Given a path increment the reference count to the dentry and the vfsmount. - */ -void path_get_long(struct path *path) -{ - mntget_long(path->mnt); - dget(path->dentry); -} - /** * path_put - put a reference to a path * @path: path to put the reference to @@ -392,18 +380,6 @@ void path_put(struct path *path) } EXPORT_SYMBOL(path_put); -/** - * path_put_long - put a long reference to a path - * @path: path to put the reference to - * - * Given a path decrement the reference count to the dentry and the vfsmount. - */ -void path_put_long(struct path *path) -{ - dput(path->dentry); - mntput_long(path->mnt); -} - /** * nameidata_drop_rcu - drop this nameidata out of rcu-walk * @nd: nameidata pathwalk data to drop diff --git a/fs/namespace.c b/fs/namespace.c index d7fc05fac753..48809e21f270 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt) unsigned int mnt_get_count(struct vfsmount *mnt) { #ifdef CONFIG_SMP - unsigned int count = atomic_read(&mnt->mnt_longrefs); + unsigned int count = 0; int cpu; for_each_possible_cpu(cpu) { @@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) if (!mnt->mnt_pcp) goto out_free_devname; - atomic_set(&mnt->mnt_longrefs, 1); + this_cpu_add(mnt->mnt_pcp->mnt_count, 1); #else mnt->mnt_count = 1; mnt->mnt_writers = 0; @@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) + list_for_each_entry(m, &head, mnt_list) { m->mnt_ns = n; + atomic_inc(&m->mnt_longterm); + } + list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + @@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt) deactivate_super(sb); } -#ifdef CONFIG_SMP -static inline void __mntput(struct vfsmount *mnt, int longrefs) +static void mntput_no_expire(struct vfsmount *mnt) { - if (!longrefs) { put_again: - br_read_lock(vfsmount_lock); - if (likely(atomic_read(&mnt->mnt_longrefs))) { - mnt_dec_count(mnt); - br_read_unlock(vfsmount_lock); - return; - } +#ifdef CONFIG_SMP + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longterm))) { + mnt_dec_count(mnt); br_read_unlock(vfsmount_lock); - } else { - BUG_ON(!atomic_read(&mnt->mnt_longrefs)); - if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) - return; + return; } + br_read_unlock(vfsmount_lock); br_write_lock(vfsmount_lock); - if (!longrefs) - mnt_dec_count(mnt); - else - atomic_dec(&mnt->mnt_longrefs); + mnt_dec_count(mnt); if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (unlikely(mnt->mnt_pinned)) { - mnt_add_count(mnt, mnt->mnt_pinned + 1); - mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto put_again; - } - br_write_unlock(vfsmount_lock); - mntfree(mnt); -} #else -static inline void __mntput(struct vfsmount *mnt, int longrefs) -{ -put_again: mnt_dec_count(mnt); if (likely(mnt_get_count(mnt))) return; br_write_lock(vfsmount_lock); +#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; @@ -789,12 +771,6 @@ put_again: br_write_unlock(vfsmount_lock); mntfree(mnt); } -#endif - -static void mntput_no_expire(struct vfsmount *mnt) -{ - __mntput(mnt, 0); -} void mntput(struct vfsmount *mnt) { @@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt) /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ if (unlikely(mnt->mnt_expiry_mark)) mnt->mnt_expiry_mark = 0; - __mntput(mnt, 0); + mntput_no_expire(mnt); } } EXPORT_SYMBOL(mntput); @@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt) } EXPORT_SYMBOL(mntget); -void mntput_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) { - /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ - if (unlikely(mnt->mnt_expiry_mark)) - mnt->mnt_expiry_mark = 0; - __mntput(mnt, 1); - } -#else - mntput(mnt); -#endif -} -EXPORT_SYMBOL(mntput_long); - -struct vfsmount *mntget_long(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - if (mnt) - atomic_inc(&mnt->mnt_longrefs); - return mnt; -#else - return mntget(mnt); -#endif -} -EXPORT_SYMBOL(mntget_long); - void mnt_pin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); @@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput_long(mnt); + mntput(mnt); } } @@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + atomic_dec(&p->mnt_longterm); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; @@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) unlock: up_write(&namespace_sem); - mntput_long(newmnt); + mntput(newmnt); return err; } @@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } +void mnt_make_longterm(struct vfsmount *mnt) +{ + atomic_inc(&mnt->mnt_longterm); +} + +void mnt_make_shortterm(struct vfsmount *mnt) +{ + if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) + return; + br_write_lock(vfsmount_lock); + atomic_dec(&mnt->mnt_longterm); + br_write_unlock(vfsmount_lock); +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new_ns->root; while (p) { q->mnt_ns = new_ns; + atomic_inc(&q->mnt_longterm); if (fs) { if (p == fs->root.mnt) { + fs->root.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); rootmnt = p; - fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { + fs->pwd.mnt = mntget(q); + atomic_inc(&q->mnt_longterm); + mnt_make_shortterm(p); pwdmnt = p; - fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput_long(rootmnt); + mntput(rootmnt); if (pwdmnt) - mntput_long(pwdmnt); + mntput(pwdmnt); return new_ns; } @@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) new_ns = alloc_mnt_ns(); if (!IS_ERR(new_ns)) { mnt->mnt_ns = new_ns; + atomic_inc(&mnt->mnt_longterm); new_ns->root = mnt; list_add(&new_ns->list, &new_ns->root->mnt_list); } diff --git a/fs/pipe.c b/fs/pipe.c index e2e95fb46a1e..89e9e19b1b2e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput_long(pipe_mnt); + mntput(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/super.c b/fs/super.c index 4f6a3571a634..74e149efed81 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput_long(mnt); + mntput(mnt); return ERR_PTR(err); } diff --git a/include/linux/mount.h b/include/linux/mount.h index af4765ea8fde..604f122a2326 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -60,7 +60,7 @@ struct vfsmount { struct super_block *mnt_sb; /* pointer to superblock */ #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longrefs; + atomic_t mnt_longterm; /* how many of the refs are longterm */ #else int mnt_count; int mnt_writers; @@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mntput_long(struct vfsmount *mnt); -extern struct vfsmount *mntget_long(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); diff --git a/include/linux/path.h b/include/linux/path.h index a581e8c06533..edc98dec6266 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -10,9 +10,7 @@ struct path { }; extern void path_get(struct path *); -extern void path_get_long(struct path *); extern void path_put(struct path *); -extern void path_put_long(struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { -- cgit v1.2.3 From 28e58ee8ce1f0e69c207f747b7b9054b071e328d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 20 Jan 2011 16:21:59 -0800 Subject: Fix broken "pipe: use event aware wakeups" optimization Commit e462c448fdc8 ("pipe: use event aware wakeups") optimized the pipe event wakeup calls to avoid wakeups if the events do not match the requested set. However, the optimization was buggy, in that it didn't actually use the correct sets for the events: when we make room for more data to be written, the pipe poll() routine will return both the POLLOUT _and_ POLLWRNORM bits. Similarly for read. And most critically, when a pipe is released, that will potentially result in POLLHUP|POLLERR (depending on whether it was the last reader or writer), not just the regular POLLIN|POLLOUT. This bug showed itself as a hung gnome-screensaver-dialog process, stuck forever (or at least until it was poked by a signal or by being traced) in a poll() system call. Cc: Davide Libenzi Cc: David S. Miller Cc: Eric Dumazet Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 89e9e19b1b2e..da42f7db50de 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -441,7 +441,7 @@ redo: break; } if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } pipe_wait(pipe); @@ -450,7 +450,7 @@ redo: /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } if (ret > 0) @@ -612,7 +612,7 @@ redo2: break; } if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); do_wakeup = 0; } @@ -623,7 +623,7 @@ redo2: out: mutex_unlock(&inode->i_mutex); if (do_wakeup) { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } if (ret > 0) @@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw) if (!pipe->readers && !pipe->writers) { free_pipe_info(inode); } else { - wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT); + wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } -- cgit v1.2.3