summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-23 23:24:19 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-23 23:24:19 +0400
commit79c1cb7a8ca8f08cd78f7c5eebc85bbe937f5ad4 (patch)
tree0dafecbb8f729aed249e5ff66daa21a6136759c6
parentb0d19a378a409373244088511e889957645f2a44 (diff)
parent97e7e0f71d6d948c25f11f0a33878d9356d9579e (diff)
downloadlinux-79c1cb7a8ca8f08cd78f7c5eebc85bbe937f5ad4.tar.xz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: [patch 7/7] vfs: mountinfo: show dominating group id [patch 6/7] vfs: mountinfo: add /proc/<pid>/mountinfo [patch 5/7] vfs: mountinfo: allow using process root [patch 4/7] vfs: mountinfo: add mount peer group ID [patch 3/7] vfs: mountinfo: add mount ID [patch 2/7] vfs: mountinfo: add seq_file_root() [patch 1/7] vfs: mountinfo: add dentry_path() [PATCH] remove unused label in xattr.c (noise from ro-bind)
-rw-r--r--Documentation/filesystems/proc.txt38
-rw-r--r--fs/dcache.c114
-rw-r--r--fs/namespace.c265
-rw-r--r--fs/pnode.c56
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/base.c121
-rw-r--r--fs/seq_file.c95
-rw-r--r--fs/xattr.c1
-rw-r--r--include/linux/dcache.h2
-rw-r--r--include/linux/mnt_namespace.h12
-rw-r--r--include/linux/mount.h2
-rw-r--r--include/linux/seq_file.h4
12 files changed, 573 insertions, 138 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 518ebe609e2b..2a99116edc47 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -43,6 +43,7 @@ Table of Contents
2.13 /proc/<pid>/oom_score - Display current oom-killer score
2.14 /proc/<pid>/io - Display the IO accounting fields
2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
+ 2.16 /proc/<pid>/mountinfo - Information about mounts
------------------------------------------------------------------------------
Preface
@@ -2348,4 +2349,41 @@ For example:
$ echo 0x7 > /proc/self/coredump_filter
$ ./some_program
+2.16 /proc/<pid>/mountinfo - Information about mounts
+--------------------------------------------------------
+
+This file contains lines of the form:
+
+36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+
+(1) mount ID: unique identifier of the mount (may be reused after umount)
+(2) parent ID: ID of parent (or of self for the top of the mount tree)
+(3) major:minor: value of st_dev for files on filesystem
+(4) root: root of the mount within the filesystem
+(5) mount point: mount point relative to the process's root
+(6) mount options: per mount options
+(7) optional fields: zero or more fields of the form "tag[:value]"
+(8) separator: marks the end of the optional fields
+(9) filesystem type: name of filesystem of the form "type[.subtype]"
+(10) mount source: filesystem specific information or "none"
+(11) super options: per super block options
+
+Parsers should ignore all unrecognised optional fields. Currently the
+possible optional fields are:
+
+shared:X mount is shared in peer group X
+master:X mount is slave to peer group X
+propagate_from:X mount is slave and receives propagation from peer group X (*)
+unbindable mount is unbindable
+
+(*) X is the closest dominant peer group under the process's root. If
+X is the immediate master of the mount, or if there's no dominant peer
+group under the same root, then only the "master:X" field is present
+and not the "propagate_from:X" field.
+
+For more information on mount propagation see:
+
+ Documentation/filesystems/sharedsubtree.txt
+
------------------------------------------------------------------------------
diff --git a/fs/dcache.c b/fs/dcache.c
index 43455776711e..3ee588d5f585 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1746,12 +1746,21 @@ shouldnt_be_hashed:
goto shouldnt_be_hashed;
}
+static int prepend(char **buffer, int *buflen, const char *str,
+ int namelen)
+{
+ *buflen -= namelen;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ return 0;
+}
+
/**
* d_path - return the path of a dentry
- * @dentry: dentry to report
- * @vfsmnt: vfsmnt to which the dentry belongs
- * @root: root dentry
- * @rootmnt: vfsmnt to which the root dentry belongs
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry (may be modified by this function)
* @buffer: buffer to return value in
* @buflen: buffer length
*
@@ -1761,23 +1770,22 @@ shouldnt_be_hashed:
* Returns the buffer or an error code if the path was too long.
*
* "buflen" should be positive. Caller holds the dcache_lock.
+ *
+ * If path is not reachable from the supplied root, then the value of
+ * root is changed (without modifying refcounts).
*/
-static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
- struct path *root, char *buffer, int buflen)
+char *__d_path(const struct path *path, struct path *root,
+ char *buffer, int buflen)
{
+ struct dentry *dentry = path->dentry;
+ struct vfsmount *vfsmnt = path->mnt;
char * end = buffer+buflen;
char * retval;
- int namelen;
-
- *--end = '\0';
- buflen--;
- if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
- buflen -= 10;
- end -= 10;
- if (buflen < 0)
+
+ prepend(&end, &buflen, "\0", 1);
+ if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+ (prepend(&end, &buflen, " (deleted)", 10) != 0))
goto Elong;
- memcpy(end, " (deleted)", 10);
- }
if (buflen < 1)
goto Elong;
@@ -1804,13 +1812,10 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
}
parent = dentry->d_parent;
prefetch(parent);
- namelen = dentry->d_name.len;
- buflen -= namelen + 1;
- if (buflen < 0)
+ if ((prepend(&end, &buflen, dentry->d_name.name,
+ dentry->d_name.len) != 0) ||
+ (prepend(&end, &buflen, "/", 1) != 0))
goto Elong;
- end -= namelen;
- memcpy(end, dentry->d_name.name, namelen);
- *--end = '/';
retval = end;
dentry = parent;
}
@@ -1818,12 +1823,12 @@ static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
return retval;
global_root:
- namelen = dentry->d_name.len;
- buflen -= namelen;
- if (buflen < 0)
+ retval += 1; /* hit the slash */
+ if (prepend(&retval, &buflen, dentry->d_name.name,
+ dentry->d_name.len) != 0)
goto Elong;
- retval -= namelen-1; /* hit the slash */
- memcpy(retval, dentry->d_name.name, namelen);
+ root->mnt = vfsmnt;
+ root->dentry = dentry;
return retval;
Elong:
return ERR_PTR(-ENAMETOOLONG);
@@ -1846,6 +1851,7 @@ char *d_path(struct path *path, char *buf, int buflen)
{
char *res;
struct path root;
+ struct path tmp;
/*
* We have various synthetic filesystems that never get mounted. On
@@ -1859,10 +1865,11 @@ char *d_path(struct path *path, char *buf, int buflen)
read_lock(&current->fs->lock);
root = current->fs->root;
- path_get(&current->fs->root);
+ path_get(&root);
read_unlock(&current->fs->lock);
spin_lock(&dcache_lock);
- res = __d_path(path->dentry, path->mnt, &root, buf, buflen);
+ tmp = root;
+ res = __d_path(path, &tmp, buf, buflen);
spin_unlock(&dcache_lock);
path_put(&root);
return res;
@@ -1890,6 +1897,48 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
}
/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+ char *end = buf + buflen;
+ char *retval;
+
+ spin_lock(&dcache_lock);
+ prepend(&end, &buflen, "\0", 1);
+ if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+ (prepend(&end, &buflen, "//deleted", 9) != 0))
+ goto Elong;
+ if (buflen < 1)
+ goto Elong;
+ /* Get '/' right */
+ retval = end-1;
+ *retval = '/';
+
+ for (;;) {
+ struct dentry *parent;
+ if (IS_ROOT(dentry))
+ break;
+
+ parent = dentry->d_parent;
+ prefetch(parent);
+
+ if ((prepend(&end, &buflen, dentry->d_name.name,
+ dentry->d_name.len) != 0) ||
+ (prepend(&end, &buflen, "/", 1) != 0))
+ goto Elong;
+
+ retval = end;
+ dentry = parent;
+ }
+ spin_unlock(&dcache_lock);
+ return retval;
+Elong:
+ spin_unlock(&dcache_lock);
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+/*
* NOTE! The user-level library version returns a
* character pointer. The kernel system call just
* returns the length of the buffer filled (which
@@ -1918,9 +1967,9 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
read_lock(&current->fs->lock);
pwd = current->fs->pwd;
- path_get(&current->fs->pwd);
+ path_get(&pwd);
root = current->fs->root;
- path_get(&current->fs->root);
+ path_get(&root);
read_unlock(&current->fs->lock);
error = -ENOENT;
@@ -1928,9 +1977,10 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
spin_lock(&dcache_lock);
if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
unsigned long len;
+ struct path tmp = root;
char * cwd;
- cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE);
+ cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
spin_unlock(&dcache_lock);
error = PTR_ERR(cwd);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1bf302d0478b..0505fb61aa74 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,7 @@
#include <linux/mount.h>
#include <linux/ramfs.h>
#include <linux/log2.h>
+#include <linux/idr.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "pnode.h"
@@ -39,6 +40,8 @@
__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
static int event;
+static DEFINE_IDA(mnt_id_ida);
+static DEFINE_IDA(mnt_group_ida);
static struct list_head *mount_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
@@ -58,10 +61,63 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
+/* allocation is serialized by namespace_sem */
+static int mnt_alloc_id(struct vfsmount *mnt)
+{
+ int res;
+
+retry:
+ ida_pre_get(&mnt_id_ida, GFP_KERNEL);
+ spin_lock(&vfsmount_lock);
+ res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
+ spin_unlock(&vfsmount_lock);
+ if (res == -EAGAIN)
+ goto retry;
+
+ return res;
+}
+
+static void mnt_free_id(struct vfsmount *mnt)
+{
+ spin_lock(&vfsmount_lock);
+ ida_remove(&mnt_id_ida, mnt->mnt_id);
+ spin_unlock(&vfsmount_lock);
+}
+
+/*
+ * Allocate a new peer group ID
+ *
+ * mnt_group_ida is protected by namespace_sem
+ */
+static int mnt_alloc_group_id(struct vfsmount *mnt)
+{
+ if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
+ return -ENOMEM;
+
+ return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
+}
+
+/*
+ * Release a peer group ID
+ */
+void mnt_release_group_id(struct vfsmount *mnt)
+{
+ ida_remove(&mnt_group_ida, mnt->mnt_group_id);
+ mnt->mnt_group_id = 0;
+}
+
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
if (mnt) {
+ int err;
+
+ err = mnt_alloc_id(mnt);
+ if (err) {
+ kmem_cache_free(mnt_cache, mnt);
+ return NULL;
+ }
+
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
@@ -353,6 +409,7 @@ EXPORT_SYMBOL(simple_set_mnt);
void free_vfsmnt(struct vfsmount *mnt)
{
kfree(mnt->mnt_devname);
+ mnt_free_id(mnt);
kmem_cache_free(mnt_cache, mnt);
}
@@ -499,6 +556,17 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
if (mnt) {
+ if (flag & (CL_SLAVE | CL_PRIVATE))
+ mnt->mnt_group_id = 0; /* not a peer of original */
+ else
+ mnt->mnt_group_id = old->mnt_group_id;
+
+ if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
+ int err = mnt_alloc_group_id(mnt);
+ if (err)
+ goto out_free;
+ }
+
mnt->mnt_flags = old->mnt_flags;
atomic_inc(&sb->s_active);
mnt->mnt_sb = sb;
@@ -528,6 +596,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
}
}
return mnt;
+
+ out_free:
+ free_vfsmnt(mnt);
+ return NULL;
}
static inline void __mntput(struct vfsmount *mnt)
@@ -652,20 +724,21 @@ void save_mount_options(struct super_block *sb, char *options)
}
EXPORT_SYMBOL(save_mount_options);
+#ifdef CONFIG_PROC_FS
/* iterator */
static void *m_start(struct seq_file *m, loff_t *pos)
{
- struct mnt_namespace *n = m->private;
+ struct proc_mounts *p = m->private;
down_read(&namespace_sem);
- return seq_list_start(&n->list, *pos);
+ return seq_list_start(&p->ns->list, *pos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct mnt_namespace *n = m->private;
+ struct proc_mounts *p = m->private;
- return seq_list_next(v, &n->list, pos);
+ return seq_list_next(v, &p->ns->list, pos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -673,20 +746,30 @@ static void m_stop(struct seq_file *m, void *v)
up_read(&namespace_sem);
}
-static int show_vfsmnt(struct seq_file *m, void *v)
+struct proc_fs_info {
+ int flag;
+ const char *str;
+};
+
+static void show_sb_opts(struct seq_file *m, struct super_block *sb)
{
- struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
- int err = 0;
- static struct proc_fs_info {
- int flag;
- char *str;
- } fs_info[] = {
+ static const struct proc_fs_info fs_info[] = {
{ MS_SYNCHRONOUS, ",sync" },
{ MS_DIRSYNC, ",dirsync" },
{ MS_MANDLOCK, ",mand" },
{ 0, NULL }
};
- static struct proc_fs_info mnt_info[] = {
+ const struct proc_fs_info *fs_infop;
+
+ for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+ if (sb->s_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+}
+
+static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+{
+ static const struct proc_fs_info mnt_info[] = {
{ MNT_NOSUID, ",nosuid" },
{ MNT_NODEV, ",nodev" },
{ MNT_NOEXEC, ",noexec" },
@@ -695,40 +778,108 @@ static int show_vfsmnt(struct seq_file *m, void *v)
{ MNT_RELATIME, ",relatime" },
{ 0, NULL }
};
- struct proc_fs_info *fs_infop;
+ const struct proc_fs_info *fs_infop;
+
+ for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+ if (mnt->mnt_flags & fs_infop->flag)
+ seq_puts(m, fs_infop->str);
+ }
+}
+
+static void show_type(struct seq_file *m, struct super_block *sb)
+{
+ mangle(m, sb->s_type->name);
+ if (sb->s_subtype && sb->s_subtype[0]) {
+ seq_putc(m, '.');
+ mangle(m, sb->s_subtype);
+ }
+}
+
+static int show_vfsmnt(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+ int err = 0;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
seq_putc(m, ' ');
seq_path(m, &mnt_path, " \t\n\\");
seq_putc(m, ' ');
- mangle(m, mnt->mnt_sb->s_type->name);
- if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
- seq_putc(m, '.');
- mangle(m, mnt->mnt_sb->s_subtype);
- }
+ show_type(m, mnt->mnt_sb);
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
- for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
- if (mnt->mnt_sb->s_flags & fs_infop->flag)
- seq_puts(m, fs_infop->str);
- }
- for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
- if (mnt->mnt_flags & fs_infop->flag)
- seq_puts(m, fs_infop->str);
- }
+ show_sb_opts(m, mnt->mnt_sb);
+ show_mnt_opts(m, mnt);
if (mnt->mnt_sb->s_op->show_options)
err = mnt->mnt_sb->s_op->show_options(m, mnt);
seq_puts(m, " 0 0\n");
return err;
}
-struct seq_operations mounts_op = {
+const struct seq_operations mounts_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_vfsmnt
};
+static int show_mountinfo(struct seq_file *m, void *v)
+{
+ struct proc_mounts *p = m->private;
+ struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
+ struct super_block *sb = mnt->mnt_sb;
+ struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+ struct path root = p->root;
+ int err = 0;
+
+ seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
+ MAJOR(sb->s_dev), MINOR(sb->s_dev));
+ seq_dentry(m, mnt->mnt_root, " \t\n\\");
+ seq_putc(m, ' ');
+ seq_path_root(m, &mnt_path, &root, " \t\n\\");
+ if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
+ /*
+ * Mountpoint is outside root, discard that one. Ugly,
+ * but less so than trying to do that in iterator in a
+ * race-free way (due to renames).
+ */
+ return SEQ_SKIP;
+ }
+ seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
+ show_mnt_opts(m, mnt);
+
+ /* Tagged fields ("foo:X" or "bar") */
+ if (IS_MNT_SHARED(mnt))
+ seq_printf(m, " shared:%i", mnt->mnt_group_id);
+ if (IS_MNT_SLAVE(mnt)) {
+ int master = mnt->mnt_master->mnt_group_id;
+ int dom = get_dominating_id(mnt, &p->root);
+ seq_printf(m, " master:%i", master);
+ if (dom && dom != master)
+ seq_printf(m, " propagate_from:%i", dom);
+ }
+ if (IS_MNT_UNBINDABLE(mnt))
+ seq_puts(m, " unbindable");
+
+ /* Filesystem specific data */
+ seq_puts(m, " - ");
+ show_type(m, sb);
+ seq_putc(m, ' ');
+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
+ seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
+ show_sb_opts(m, sb);
+ if (sb->s_op->show_options)
+ err = sb->s_op->show_options(m, mnt);
+ seq_putc(m, '\n');
+ return err;
+}
+
+const struct seq_operations mountinfo_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_mountinfo,
+};
+
static int show_vfsstat(struct seq_file *m, void *v)
{
struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
@@ -749,7 +900,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
/* file system type */
seq_puts(m, "with fstype ");
- mangle(m, mnt->mnt_sb->s_type->name);
+ show_type(m, mnt->mnt_sb);
/* optional statistics */
if (mnt->mnt_sb->s_op->show_stats) {
@@ -761,12 +912,13 @@ static int show_vfsstat(struct seq_file *m, void *v)
return err;
}
-struct seq_operations mountstats_op = {
+const struct seq_operations mountstats_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_vfsstat,
};
+#endif /* CONFIG_PROC_FS */
/**
* may_umount_tree - check if a mount tree is busy
@@ -1108,6 +1260,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
release_mounts(&umount_list);
}
+static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
+{
+ struct vfsmount *p;
+
+ for (p = mnt; p != end; p = next_mnt(p, mnt)) {
+ if (p->mnt_group_id && !IS_MNT_SHARED(p))
+ mnt_release_group_id(p);
+ }
+}
+
+static int invent_group_ids(struct vfsmount *mnt, bool recurse)
+{
+ struct vfsmount *p;
+
+ for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
+ if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
+ int err = mnt_alloc_group_id(p);
+ if (err) {
+ cleanup_group_ids(mnt, p);
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
@@ -1178,9 +1357,16 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
struct vfsmount *dest_mnt = path->mnt;
struct dentry *dest_dentry = path->dentry;
struct vfsmount *child, *p;
+ int err;
- if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
- return -EINVAL;
+ if (IS_MNT_SHARED(dest_mnt)) {
+ err = invent_group_ids(source_mnt, true);
+ if (err)
+ goto out;
+ }
+ err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
+ if (err)
+ goto out_cleanup_ids;
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1203,6 +1389,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
}
spin_unlock(&vfsmount_lock);
return 0;
+
+ out_cleanup_ids:
+ if (IS_MNT_SHARED(dest_mnt))
+ cleanup_group_ids(source_mnt, NULL);
+ out:
+ return err;
}
static int graft_tree(struct vfsmount *mnt, struct path *path)
@@ -1243,6 +1435,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
struct vfsmount *m, *mnt = nd->path.mnt;
int recurse = flag & MS_REC;
int type = flag & ~MS_REC;
+ int err = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1251,12 +1444,20 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
return -EINVAL;
down_write(&namespace_sem);
+ if (type == MS_SHARED) {
+ err = invent_group_ids(mnt, recurse);
+ if (err)
+ goto out_unlock;
+ }
+
spin_lock(&vfsmount_lock);
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
spin_unlock(&vfsmount_lock);
+
+ out_unlock:
up_write(&namespace_sem);
- return 0;
+ return err;
}
/*
diff --git a/fs/pnode.c b/fs/pnode.c
index f968e35d9785..8d5f392ec3d3 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -28,6 +28,57 @@ static inline struct vfsmount *next_slave(struct vfsmount *p)
return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
}
+/*
+ * Return true if path is reachable from root
+ *
+ * namespace_sem is held, and mnt is attached
+ */
+static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry,
+ const struct path *root)
+{
+ while (mnt != root->mnt && mnt->mnt_parent != mnt) {
+ dentry = mnt->mnt_mountpoint;
+ mnt = mnt->mnt_parent;
+ }
+ return mnt == root->mnt && is_subdir(dentry, root->dentry);
+}
+
+static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
+ struct mnt_namespace *ns,
+ const struct path *root)
+{
+ struct vfsmount *m = mnt;
+
+ do {
+ /* Check the namespace first for optimization */
+ if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root))
+ return m;
+
+ m = next_peer(m);
+ } while (m != mnt);
+
+ return NULL;
+}
+
+/*
+ * Get ID of closest dominating peer group having a representative
+ * under the given root.
+ *
+ * Caller must hold namespace_sem
+ */
+int get_dominating_id(struct vfsmount *mnt, const struct path *root)
+{
+ struct vfsmount *m;
+
+ for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
+ struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root);
+ if (d)
+ return d->mnt_group_id;
+ }
+
+ return 0;
+}
+
static int do_make_slave(struct vfsmount *mnt)
{
struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master;
@@ -46,7 +97,11 @@ static int do_make_slave(struct vfsmount *mnt)
if (peer_mnt == mnt)
peer_mnt = NULL;
}
+ if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share))
+ mnt_release_group_id(mnt);
+
list_del_init(&mnt->mnt_share);
+ mnt->mnt_group_id = 0;
if (peer_mnt)
master = peer_mnt;
@@ -68,7 +123,6 @@ static int do_make_slave(struct vfsmount *mnt)
}
mnt->mnt_master = master;
CLEAR_MNT_SHARED(mnt);
- INIT_LIST_HEAD(&mnt->mnt_slave_list);
return 0;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 973c3f825e7d..958665d662af 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -36,4 +36,5 @@ int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
int propagate_umount(struct list_head *);
int propagate_mount_busy(struct vfsmount *, int);
void mnt_release_group_id(struct vfsmount *);
+int get_dominating_id(struct vfsmount *mnt, const struct path *root);
#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7313c62e3e9d..c5e412a00b17 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -502,17 +502,14 @@ static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
-extern const struct seq_operations mounts_op;
-struct proc_mounts {
- struct seq_file m;
- int event;
-};
-
-static int mounts_open(struct inode *inode, struct file *file)
+static int mounts_open_common(struct inode *inode, struct file *file,
+ const struct seq_operations *op)
{
struct task_struct *task = get_proc_task(inode);
struct nsproxy *nsp;
struct mnt_namespace *ns = NULL;
+ struct fs_struct *fs = NULL;
+ struct path root;
struct proc_mounts *p;
int ret = -EINVAL;
@@ -525,40 +522,61 @@ static int mounts_open(struct inode *inode, struct file *file)
get_mnt_ns(ns);
}
rcu_read_unlock();
-
+ if (ns)
+ fs = get_fs_struct(task);
put_task_struct(task);
}
- if (ns) {
- ret = -ENOMEM;
- p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
- if (p) {
- file->private_data = &p->m;
- ret = seq_open(file, &mounts_op);
- if (!ret) {
- p->m.private = ns;
- p->event = ns->event;
- return 0;
- }
- kfree(p);
- }
- put_mnt_ns(ns);
- }
+ if (!ns)
+ goto err;
+ if (!fs)
+ goto err_put_ns;
+
+ read_lock(&fs->lock);
+ root = fs->root;
+ path_get(&root);
+ read_unlock(&fs->lock);
+ put_fs_struct(fs);
+
+ ret = -ENOMEM;
+ p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+ if (!p)
+ goto err_put_path;
+
+ file->private_data = &p->m;
+ ret = seq_open(file, op);
+ if (ret)
+ goto err_free;
+
+ p->m.private = p;
+ p->ns = ns;
+ p->root = root;
+ p->event = ns->event;
+
+ return 0;
+
+ err_free:
+ kfree(p);
+ err_put_path:
+ path_put(&root);
+ err_put_ns:
+ put_mnt_ns(ns);
+ err:
return ret;
}
static int mounts_release(struct inode *inode, struct file *file)
{
- struct seq_file *m = file->private_data;
- struct mnt_namespace *ns = m->private;
- put_mnt_ns(ns);
+ struct proc_mounts *p = file->private_data;
+ path_put(&p->root);
+ put_mnt_ns(p->ns);
return seq_release(inode, file);
}
static unsigned mounts_poll(struct file *file, poll_table *wait)
{
struct proc_mounts *p = file->private_data;
- struct mnt_namespace *ns = p->m.private;
+ struct mnt_namespace *ns = p->ns;
unsigned res = 0;
poll_wait(file, &ns->poll, wait);
@@ -573,6 +591,11 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
return res;
}
+static int mounts_open(struct inode *inode, struct file *file)
+{
+ return mounts_open_common(inode, file, &mounts_op);
+}
+
static const struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read = seq_read,
@@ -581,38 +604,22 @@ static const struct file_operations proc_mounts_operations = {
.poll = mounts_poll,
};
-extern const struct seq_operations mountstats_op;
-static int mountstats_open(struct inode *inode, struct file *file)
+static int mountinfo_open(struct inode *inode, struct file *file)
{
- int ret = seq_open(file, &mountstats_op);
-
- if (!ret) {
- struct seq_file *m = file->private_data;
- struct nsproxy *nsp;
- struct mnt_namespace *mnt_ns = NULL;
- struct task_struct *task = get_proc_task(inode);
-
- if (task) {
- rcu_read_lock();
- nsp = task_nsproxy(task);
- if (nsp) {
- mnt_ns = nsp->mnt_ns;
- if (mnt_ns)
- get_mnt_ns(mnt_ns);
- }
- rcu_read_unlock();
+ return mounts_open_common(inode, file, &mountinfo_op);
+}
- put_task_struct(task);
- }
+static const struct file_operations proc_mountinfo_operations = {
+ .open = mountinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mounts_release,
+ .poll = mounts_poll,
+};
- if (mnt_ns)
- m->private = mnt_ns;
- else {
- seq_release(inode, file);
- ret = -EINVAL;
- }
- }
- return ret;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+ return mounts_open_common(inode, file, &mountstats_op);
}
static const struct file_operations proc_mountstats_operations = {
@@ -2309,6 +2316,7 @@ static const struct pid_entry tgid_base_stuff[] = {
LNK("root", root),
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
+ REG("mountinfo", S_IRUGO, mountinfo),
REG("mountstats", S_IRUSR, mountstats),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
@@ -2641,6 +2649,7 @@ static const struct pid_entry tid_base_stuff[] = {
LNK("root", root),
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
+ REG("mountinfo", S_IRUGO, mountinfo),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
diff --git a/fs/seq_file.c b/fs/seq_file.c
index cdfd996ca6ef..3f54dbd6c49b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -350,28 +350,40 @@ int seq_printf(struct seq_file *m, const char *f, ...)
}
EXPORT_SYMBOL(seq_printf);
+static char *mangle_path(char *s, char *p, char *esc)
+{
+ while (s <= p) {
+ char c = *p++;
+ if (!c) {
+ return s;
+ } else if (!strchr(esc, c)) {
+ *s++ = c;
+ } else if (s + 4 > p) {
+ break;
+ } else {
+ *s++ = '\\';
+ *s++ = '0' + ((c & 0300) >> 6);
+ *s++ = '0' + ((c & 070) >> 3);
+ *s++ = '0' + (c & 07);
+ }
+ }
+ return NULL;
+}
+
+/*
+ * return the absolute path of 'dentry' residing in mount 'mnt'.
+ */
int seq_path(struct seq_file *m, struct path *path, char *esc)
{
if (m->count < m->size) {
char *s = m->buf + m->count;
char *p = d_path(path, s, m->size - m->count);
if (!IS_ERR(p)) {
- while (s <= p) {
- char c = *p++;
- if (!c) {
- p = m->buf + m->count;
- m->count = s - m->buf;
- return s - p;
- } else if (!strchr(esc, c)) {
- *s++ = c;
- } else if (s + 4 > p) {
- break;
- } else {
- *s++ = '\\';
- *s++ = '0' + ((c & 0300) >> 6);
- *s++ = '0' + ((c & 070) >> 3);
- *s++ = '0' + (c & 07);
- }
+ s = mangle_path(s, p, esc);
+ if (s) {
+ p = m->buf + m->count;
+ m->count = s - m->buf;
+ return s - p;
}
}
}
@@ -380,6 +392,57 @@ int seq_path(struct seq_file *m, struct path *path, char *esc)
}
EXPORT_SYMBOL(seq_path);
+/*
+ * Same as seq_path, but relative to supplied root.
+ *
+ * root may be changed, see __d_path().
+ */
+int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
+ char *esc)
+{
+ int err = -ENAMETOOLONG;
+ if (m->count < m->size) {
+ char *s = m->buf + m->count;
+ char *p;
+
+ spin_lock(&dcache_lock);
+ p = __d_path(path, root, s, m->size - m->count);
+ spin_unlock(&dcache_lock);
+ err = PTR_ERR(p);
+ if (!IS_ERR(p)) {
+ s = mangle_path(s, p, esc);
+ if (s) {
+ p = m->buf + m->count;
+ m->count = s - m->buf;
+ return 0;
+ }
+ }
+ }
+ m->count = m->size;
+ return err;
+}
+
+/*
+ * returns the path of the 'dentry' from the root of its filesystem.
+ */
+int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
+{
+ if (m->count < m->size) {
+ char *s = m->buf + m->count;
+ char *p = dentry_path(dentry, s, m->size - m->count);
+ if (!IS_ERR(p)) {
+ s = mangle_path(s, p, esc);
+ if (s) {
+ p = m->buf + m->count;
+ m->count = s - m->buf;
+ return s - p;
+ }
+ }
+ }
+ m->count = m->size;
+ return -1;
+}
+
static void *single_start(struct seq_file *p, loff_t *pos)
{
return NULL + (*pos == 0);
diff --git a/fs/xattr.c b/fs/xattr.c
index f7062da505d4..89a942f07e1b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -307,7 +307,6 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
error = setxattr(dentry, name, value, size, flags);
mnt_drop_write(f->f_path.mnt);
}
-out_fput:
fput(f);
return error;
}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index fabd16d03a27..cfb1627ac51c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -301,7 +301,9 @@ extern int d_validate(struct dentry *, struct dentry *);
*/
extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
+extern char *__d_path(const struct path *path, struct path *root, char *, int);
extern char *d_path(struct path *, char *, int);
+extern char *dentry_path(struct dentry *, char *, int);
/* Allocation counts.. */
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 8eed44f8ca73..830bbcd449d6 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -5,6 +5,7 @@
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/nsproxy.h>
+#include <linux/seq_file.h>
struct mnt_namespace {
atomic_t count;
@@ -14,6 +15,13 @@ struct mnt_namespace {
int event;
};
+struct proc_mounts {
+ struct seq_file m; /* must be the first element */
+ struct mnt_namespace *ns;
+ struct path root;
+ int event;
+};
+
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct fs_struct *);
extern void __put_mnt_ns(struct mnt_namespace *ns);
@@ -37,5 +45,9 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
atomic_inc(&ns->count);
}
+extern const struct seq_operations mounts_op;
+extern const struct seq_operations mountinfo_op;
+extern const struct seq_operations mountstats_op;
+
#endif
#endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 87b24cea1863..b4836d58f428 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -56,6 +56,8 @@ struct vfsmount {
struct list_head mnt_slave; /* slave list entry */
struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
+ int mnt_id; /* mount identifier */
+ int mnt_group_id; /* peer group identifier */
/*
* We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
* to let these frequently modified fields in a separate cache line
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index d65796dc26d9..5b5369c3c209 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -10,6 +10,7 @@ struct seq_operations;
struct file;
struct path;
struct inode;
+struct dentry;
struct seq_file {
char *buf;
@@ -44,6 +45,9 @@ int seq_printf(struct seq_file *, const char *, ...)
__attribute__ ((format (printf,2,3)));
int seq_path(struct seq_file *, struct path *, char *);
+int seq_dentry(struct seq_file *, struct dentry *, char *);
+int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
+ char *esc);
int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
int single_release(struct inode *, struct file *);