From 6cfd76a26d9fe2ba54b9d496a48c1d9285e5c5ed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:37:22 -0800 Subject: [PATCH] lockdep: name some old style locks Name some of the remaning 'old_style_spin_init' locks Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 33c5daacc743..733790d4f7db 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -73,7 +73,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ .count = ATOMIC_INIT(1), \ - .nslock = SPIN_LOCK_UNLOCKED, \ + .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ .namespace = NULL, \ INIT_IPC_NS(ipc_ns) \ -- cgit v1.2.3 From 1ec320afdc9552c92191d5f89fcd1ebe588334ca Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Fri, 8 Dec 2006 02:37:55 -0800 Subject: [PATCH] add process_session() helper routine: deprecate old field Add an anonymous union and ((deprecated)) to catch direct usage of the session field. [akpm@osdl.org: fix various missed conversions] [jdike@addtoit.com: fix UML bug] Signed-off-by: Jeff Dike Cc: Cedric Le Goater Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 2 +- fs/proc/array.c | 2 +- include/linux/init_task.h | 11 ++++++----- include/linux/sched.h | 19 +++++++++++++++++-- kernel/exit.c | 2 +- kernel/fork.c | 2 +- 6 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 4ebba7ca1dc9..48cee2004e97 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -3855,7 +3855,7 @@ EXPORT_SYMBOL(proc_clear_tty); void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) { if (tty) { - tty->session = tsk->signal->session; + tty->session = process_session(tsk); tty->pgrp = process_group(tsk); } tsk->signal->tty = tty; diff --git a/fs/proc/array.c b/fs/proc/array.c index b0cd014a39bd..70e4fab117b1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,7 +381,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) stime = cputime_add(stime, sig->stime); } - sid = sig->session; + sid = signal_session(sig); pgid = process_group(task); ppid = rcu_dereference(task->real_parent)->tgid; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 733790d4f7db..848a68af3d42 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,17 +57,18 @@ .cpu_vm_mask = CPU_MASK_ALL, \ } -#define INIT_SIGNALS(sig) { \ - .count = ATOMIC_INIT(1), \ +#define INIT_SIGNALS(sig) { \ + .count = ATOMIC_INIT(1), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ - .shared_pending = { \ + .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ - .signal = {{0}}}, \ + .signal = {{0}}}, \ .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ .pgrp = 1, \ - .session = 1, \ + .tty_old_pgrp = 0, \ + { .__session = 1}, \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 270d864a8ff1..6fec1d419714 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -436,7 +436,12 @@ struct signal_struct { /* job control IDs */ pid_t pgrp; pid_t tty_old_pgrp; - pid_t session; + + union { + pid_t session __deprecated; + pid_t __session; + }; + /* boolean value for session group leader */ int leader; @@ -1047,9 +1052,19 @@ static inline pid_t process_group(struct task_struct *tsk) return tsk->signal->pgrp; } +static inline pid_t signal_session(struct signal_struct *sig) +{ + return sig->__session; +} + static inline pid_t process_session(struct task_struct *tsk) { - return tsk->signal->session; + return signal_session(tsk->signal); +} + +static inline void set_signal_session(struct signal_struct *sig, pid_t session) +{ + sig->__session = session; } static inline struct pid *task_pid(struct task_struct *task) diff --git a/kernel/exit.c b/kernel/exit.c index 8d289bfc13d1..6267a6cc6113 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -304,7 +304,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) if (process_session(curr) != session) { detach_pid(curr, PIDTYPE_SID); - curr->signal->session = session; + set_signal_session(curr->signal, session); attach_pid(curr, PIDTYPE_SID, session); } if (process_group(curr) != pgrp) { diff --git a/kernel/fork.c b/kernel/fork.c index 298c4d6ab512..60d2644bfe85 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1259,7 +1259,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); - p->signal->session = process_session(current); + set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, process_session(p)); -- cgit v1.2.3 From 6b3286ed1169d74fea401367d6d4d6c6ec758a81 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Fri, 8 Dec 2006 02:37:56 -0800 Subject: [PATCH] rename struct namespace to struct mnt_namespace Rename 'struct namespace' to 'struct mnt_namespace' to avoid confusion with other namespaces being developped for the containers : pid, uts, ipc, etc. 'namespace' variables and attributes are also renamed to 'mnt_ns' Signed-off-by: Kirill Korotaev Signed-off-by: Cedric Le Goater Cc: Eric W. Biederman Cc: Herbert Poetzl Cc: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/mntpt.c | 2 +- fs/namespace.c | 113 +++++++++++++++++++++--------------------- fs/nfs/getroot.c | 2 +- fs/pnode.c | 2 +- fs/pnode.h | 2 +- fs/proc/base.c | 36 +++++++------- fs/reiserfs/super.c | 2 +- include/linux/init_task.h | 2 +- include/linux/mnt_namespace.h | 42 ++++++++++++++++ include/linux/mount.h | 4 +- include/linux/namespace.h | 42 ---------------- include/linux/nsproxy.h | 4 +- kernel/exit.c | 2 +- kernel/fork.c | 21 ++++---- kernel/kmod.c | 2 +- kernel/nsproxy.c | 16 +++--- 16 files changed, 148 insertions(+), 146 deletions(-) create mode 100644 include/linux/mnt_namespace.h delete mode 100644 include/linux/namespace.h (limited to 'include/linux/init_task.h') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index f33b1a81a761..8f74e8450826 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "super.h" #include "cell.h" #include "volume.h" diff --git a/fs/namespace.c b/fs/namespace.c index b00ac84ebbdd..fde8553faa76 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -133,10 +133,10 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) static inline int check_mnt(struct vfsmount *mnt) { - return mnt->mnt_namespace == current->nsproxy->namespace; + return mnt->mnt_ns == current->nsproxy->mnt_ns; } -static void touch_namespace(struct namespace *ns) +static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { ns->event = ++event; @@ -144,7 +144,7 @@ static void touch_namespace(struct namespace *ns) } } -static void __touch_namespace(struct namespace *ns) +static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { ns->event = event; @@ -187,19 +187,19 @@ static void commit_tree(struct vfsmount *mnt) struct vfsmount *parent = mnt->mnt_parent; struct vfsmount *m; LIST_HEAD(head); - struct namespace *n = parent->mnt_namespace; + struct mnt_namespace *n = parent->mnt_ns; BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); list_for_each_entry(m, &head, mnt_list) - m->mnt_namespace = n; + m->mnt_ns = n; list_splice(&head, n->list.prev); list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - touch_namespace(n); + touch_mnt_namespace(n); } static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) @@ -320,7 +320,7 @@ EXPORT_SYMBOL(mnt_unpin); /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) { - struct namespace *n = m->private; + struct mnt_namespace *n = m->private; struct list_head *p; loff_t l = *pos; @@ -333,7 +333,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct namespace *n = m->private; + struct mnt_namespace *n = m->private; struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; (*pos)++; return p == &n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); @@ -526,8 +526,8 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_for_each_entry(p, kill, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); - __touch_namespace(p->mnt_namespace); - p->mnt_namespace = NULL; + __touch_mnt_namespace(p->mnt_ns); + p->mnt_ns = NULL; list_del_init(&p->mnt_child); if (p->mnt_parent != p) p->mnt_mountpoint->d_mounted--; @@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (parent_nd) { detach_mnt(source_mnt, parent_nd); attach_mnt(source_mnt, nd); - touch_namespace(current->nsproxy->namespace); + touch_mnt_namespace(current->nsproxy->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); commit_tree(source_mnt); @@ -1145,9 +1145,9 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, */ if (!propagate_mount_busy(mnt, 2)) { /* delete from the namespace */ - touch_namespace(mnt->mnt_namespace); + touch_mnt_namespace(mnt->mnt_ns); list_del_init(&mnt->mnt_list); - mnt->mnt_namespace = NULL; + mnt->mnt_ns = NULL; umount_tree(mnt, 1, umounts); spin_unlock(&vfsmount_lock); } else { @@ -1168,7 +1168,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, */ static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) { - struct namespace *namespace; + struct mnt_namespace *ns; struct vfsmount *mnt; while (!list_empty(graveyard)) { @@ -1178,10 +1178,10 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ - namespace = mnt->mnt_namespace; - if (!namespace || !namespace->root) + ns = mnt->mnt_ns; + if (!ns || !ns->root) continue; - get_namespace(namespace); + get_mnt_ns(ns); spin_unlock(&vfsmount_lock); down_write(&namespace_sem); @@ -1189,7 +1189,7 @@ static void expire_mount_list(struct list_head *graveyard, struct list_head *mou up_write(&namespace_sem); release_mounts(&umounts); mntput(mnt); - put_namespace(namespace); + put_mnt_ns(ns); spin_lock(&vfsmount_lock); } } @@ -1439,14 +1439,15 @@ dput_out: * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. */ -struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) +struct mnt_namespace *dup_mnt_ns(struct task_struct *tsk, + struct fs_struct *fs) { - struct namespace *namespace = tsk->nsproxy->namespace; - struct namespace *new_ns; + struct mnt_namespace *mnt_ns = tsk->nsproxy->mnt_ns; + struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; struct vfsmount *p, *q; - new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL); + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return NULL; @@ -1457,7 +1458,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) down_write(&namespace_sem); /* First pass: copy the tree topology */ - new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root, + new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root, CL_COPY_ALL | CL_EXPIRE); if (!new_ns->root) { up_write(&namespace_sem); @@ -1473,10 +1474,10 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) * as belonging to new namespace. We have already acquired a private * fs_struct, so tsk->fs->lock is not needed. */ - p = namespace->root; + p = mnt_ns->root; q = new_ns->root; while (p) { - q->mnt_namespace = new_ns; + q->mnt_ns = new_ns; if (fs) { if (p == fs->rootmnt) { rootmnt = p; @@ -1491,7 +1492,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) fs->altrootmnt = mntget(q); } } - p = next_mnt(p, namespace->root); + p = next_mnt(p, mnt_ns->root); q = next_mnt(q, new_ns->root); } up_write(&namespace_sem); @@ -1506,16 +1507,16 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) return new_ns; } -int copy_namespace(int flags, struct task_struct *tsk) +int copy_mnt_ns(int flags, struct task_struct *tsk) { - struct namespace *namespace = tsk->nsproxy->namespace; - struct namespace *new_ns; + struct mnt_namespace *ns = tsk->nsproxy->mnt_ns; + struct mnt_namespace *new_ns; int err = 0; - if (!namespace) + if (!ns) return 0; - get_namespace(namespace); + get_mnt_ns(ns); if (!(flags & CLONE_NEWNS)) return 0; @@ -1525,16 +1526,16 @@ int copy_namespace(int flags, struct task_struct *tsk) goto out; } - new_ns = dup_namespace(tsk, tsk->fs); + new_ns = dup_mnt_ns(tsk, tsk->fs); if (!new_ns) { err = -ENOMEM; goto out; } - tsk->nsproxy->namespace = new_ns; + tsk->nsproxy->mnt_ns = new_ns; out: - put_namespace(namespace); + put_mnt_ns(ns); return err; } @@ -1754,7 +1755,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, detach_mnt(user_nd.mnt, &root_parent); attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ - touch_namespace(current->nsproxy->namespace); + touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); chroot_fs_refs(&user_nd, &new_nd); security_sb_post_pivotroot(&user_nd, &new_nd); @@ -1779,27 +1780,27 @@ out3: static void __init init_mount_tree(void) { struct vfsmount *mnt; - struct namespace *namespace; + struct mnt_namespace *ns; mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); - namespace = kmalloc(sizeof(*namespace), GFP_KERNEL); - if (!namespace) + ns = kmalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) panic("Can't allocate initial namespace"); - atomic_set(&namespace->count, 1); - INIT_LIST_HEAD(&namespace->list); - init_waitqueue_head(&namespace->poll); - namespace->event = 0; - list_add(&mnt->mnt_list, &namespace->list); - namespace->root = mnt; - mnt->mnt_namespace = namespace; - - init_task.nsproxy->namespace = namespace; - get_namespace(namespace); - - set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); - set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); + atomic_set(&ns->count, 1); + INIT_LIST_HEAD(&ns->list); + init_waitqueue_head(&ns->poll); + ns->event = 0; + list_add(&mnt->mnt_list, &ns->list); + ns->root = mnt; + mnt->mnt_ns = ns; + + init_task.nsproxy->mnt_ns = ns; + get_mnt_ns(ns); + + set_fs_pwd(current->fs, ns->root, ns->root->mnt_root); + set_fs_root(current->fs, ns->root, ns->root->mnt_root); } void __init mnt_init(unsigned long mempages) @@ -1860,11 +1861,11 @@ void __init mnt_init(unsigned long mempages) init_mount_tree(); } -void __put_namespace(struct namespace *namespace) +void __put_mnt_ns(struct mnt_namespace *ns) { - struct vfsmount *root = namespace->root; + struct vfsmount *root = ns->root; LIST_HEAD(umount_list); - namespace->root = NULL; + ns->root = NULL; spin_unlock(&vfsmount_lock); down_write(&namespace_sem); spin_lock(&vfsmount_lock); @@ -1872,5 +1873,5 @@ void __put_namespace(struct namespace *namespace) spin_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); - kfree(namespace); + kfree(ns); } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 20c6f39ea38a..8391bd7a83ce 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/pnode.c b/fs/pnode.c index da42ee61c1df..56aacead8362 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -6,7 +6,7 @@ * Author : Ram Pai (linuxram@us.ibm.com) * */ -#include +#include #include #include #include "pnode.h" diff --git a/fs/pnode.h b/fs/pnode.h index 020e1bb60fdb..d45bd8ec36bf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -13,7 +13,7 @@ #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(mnt) (mnt->mnt_master) -#define IS_MNT_NEW(mnt) (!mnt->mnt_namespace) +#define IS_MNT_NEW(mnt) (!mnt->mnt_ns) #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE) diff --git a/fs/proc/base.c b/fs/proc/base.c index a71f1755bb57..a3b5074118a7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include #include #include @@ -365,33 +365,33 @@ struct proc_mounts { static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = get_proc_task(inode); - struct namespace *namespace = NULL; + struct mnt_namespace *ns = NULL; struct proc_mounts *p; int ret = -EINVAL; if (task) { task_lock(task); - namespace = task->nsproxy->namespace; - if (namespace) - get_namespace(namespace); + ns = task->nsproxy->mnt_ns; + if (ns) + get_mnt_ns(ns); task_unlock(task); put_task_struct(task); } - if (namespace) { + if (ns) { ret = -ENOMEM; p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); if (p) { file->private_data = &p->m; ret = seq_open(file, &mounts_op); if (!ret) { - p->m.private = namespace; - p->event = namespace->event; + p->m.private = ns; + p->event = ns->event; return 0; } kfree(p); } - put_namespace(namespace); + put_mnt_ns(ns); } return ret; } @@ -399,15 +399,15 @@ static int mounts_open(struct inode *inode, struct file *file) static int mounts_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; - struct namespace *namespace = m->private; - put_namespace(namespace); + struct mnt_namespace *ns = m->private; + put_mnt_ns(ns); return seq_release(inode, file); } static unsigned mounts_poll(struct file *file, poll_table *wait) { struct proc_mounts *p = file->private_data; - struct namespace *ns = p->m.private; + struct mnt_namespace *ns = p->m.private; unsigned res = 0; poll_wait(file, &ns->poll, wait); @@ -437,21 +437,21 @@ static int mountstats_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; - struct namespace *namespace = NULL; + struct mnt_namespace *mnt_ns = NULL; struct task_struct *task = get_proc_task(inode); if (task) { task_lock(task); if (task->nsproxy) - namespace = task->nsproxy->namespace; - if (namespace) - get_namespace(namespace); + mnt_ns = task->nsproxy->mnt_ns; + if (mnt_ns) + get_mnt_ns(mnt_ns); task_unlock(task); put_task_struct(task); } - if (namespace) - m->private = namespace; + if (mnt_ns) + m->private = mnt_ns; else { seq_release(inode, file); ret = -EINVAL; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7fb5fb036f90..58ad4551a7c1 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 848a68af3d42..5c4989172f7e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -76,7 +76,7 @@ extern struct nsproxy init_nsproxy; .count = ATOMIC_INIT(1), \ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ - .namespace = NULL, \ + .mnt_ns = NULL, \ INIT_IPC_NS(ipc_ns) \ } diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h new file mode 100644 index 000000000000..4af0b1fc282a --- /dev/null +++ b/include/linux/mnt_namespace.h @@ -0,0 +1,42 @@ +#ifndef _NAMESPACE_H_ +#define _NAMESPACE_H_ +#ifdef __KERNEL__ + +#include +#include +#include + +struct mnt_namespace { + atomic_t count; + struct vfsmount * root; + struct list_head list; + wait_queue_head_t poll; + int event; +}; + +extern int copy_mnt_ns(int, struct task_struct *); +extern void __put_mnt_ns(struct mnt_namespace *ns); +extern struct mnt_namespace *dup_mnt_ns(struct task_struct *, + struct fs_struct *); + +static inline void put_mnt_ns(struct mnt_namespace *ns) +{ + if (atomic_dec_and_lock(&ns->count, &vfsmount_lock)) + /* releases vfsmount_lock */ + __put_mnt_ns(ns); +} + +static inline void exit_mnt_ns(struct task_struct *p) +{ + struct mnt_namespace *ns = p->nsproxy->mnt_ns; + if (ns) + put_mnt_ns(ns); +} + +static inline void get_mnt_ns(struct mnt_namespace *ns) +{ + atomic_inc(&ns->count); +} + +#endif +#endif diff --git a/include/linux/mount.h b/include/linux/mount.h index 403d1a97c512..e357dc86a4de 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -20,7 +20,7 @@ struct super_block; struct vfsmount; struct dentry; -struct namespace; +struct mnt_namespace; #define MNT_NOSUID 0x01 #define MNT_NODEV 0x02 @@ -52,7 +52,7 @@ struct vfsmount { struct list_head mnt_slave_list;/* list of slave mounts */ struct list_head mnt_slave; /* slave list entry */ struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ - struct namespace *mnt_namespace; /* containing namespace */ + struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_pinned; }; diff --git a/include/linux/namespace.h b/include/linux/namespace.h deleted file mode 100644 index d137009f0b2b..000000000000 --- a/include/linux/namespace.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _NAMESPACE_H_ -#define _NAMESPACE_H_ -#ifdef __KERNEL__ - -#include -#include -#include - -struct namespace { - atomic_t count; - struct vfsmount * root; - struct list_head list; - wait_queue_head_t poll; - int event; -}; - -extern int copy_namespace(int, struct task_struct *); -extern void __put_namespace(struct namespace *namespace); -extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *); - -static inline void put_namespace(struct namespace *namespace) -{ - if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock)) - /* releases vfsmount_lock */ - __put_namespace(namespace); -} - -static inline void exit_namespace(struct task_struct *p) -{ - struct namespace *namespace = p->nsproxy->namespace; - if (namespace) { - put_namespace(namespace); - } -} - -static inline void get_namespace(struct namespace *namespace) -{ - atomic_inc(&namespace->count); -} - -#endif -#endif diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 971d1c6dfc4b..0aba1b1a39c7 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -4,7 +4,7 @@ #include #include -struct namespace; +struct mnt_namespace; struct uts_namespace; struct ipc_namespace; @@ -25,7 +25,7 @@ struct nsproxy { spinlock_t nslock; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; - struct namespace *namespace; + struct mnt_namespace *mnt_ns; }; extern struct nsproxy init_nsproxy; diff --git a/kernel/exit.c b/kernel/exit.c index 6267a6cc6113..28d9feedfd27 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 60d2644bfe85..8c859eef8e6a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -1525,17 +1525,18 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) } /* - * Unshare the namespace structure if it is being shared + * Unshare the mnt_namespace structure if it is being shared */ -static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) +static int unshare_mnt_namespace(unsigned long unshare_flags, + struct mnt_namespace **new_nsp, struct fs_struct *new_fs) { - struct namespace *ns = current->nsproxy->namespace; + struct mnt_namespace *ns = current->nsproxy->mnt_ns; if ((unshare_flags & CLONE_NEWNS) && ns) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; - *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); + *new_nsp = dup_mnt_ns(current, new_fs ? new_fs : current->fs); if (!*new_nsp) return -ENOMEM; } @@ -1623,7 +1624,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; - struct namespace *ns, *new_ns = NULL; + struct mnt_namespace *ns, *new_ns = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; @@ -1645,7 +1646,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; - if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) + if ((err = unshare_mnt_namespace(unshare_flags, &new_ns, new_fs))) goto bad_unshare_cleanup_fs; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_ns; @@ -1686,8 +1687,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) } if (new_ns) { - ns = current->nsproxy->namespace; - current->nsproxy->namespace = new_ns; + ns = current->nsproxy->mnt_ns; + current->nsproxy->mnt_ns = new_ns; new_ns = ns; } @@ -1748,7 +1749,7 @@ bad_unshare_cleanup_sigh: bad_unshare_cleanup_ns: if (new_ns) - put_namespace(new_ns); + put_mnt_ns(new_ns); bad_unshare_cleanup_fs: if (new_fs) diff --git a/kernel/kmod.c b/kernel/kmod.c index 8d2bea09a4ec..3a7379aa31ca 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 674aceb7335a..bd9cb435dfe0 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); @@ -60,8 +60,8 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig) struct nsproxy *ns = clone_namespaces(orig); if (ns) { - if (ns->namespace) - get_namespace(ns->namespace); + if (ns->mnt_ns) + get_mnt_ns(ns->mnt_ns); if (ns->uts_ns) get_uts_ns(ns->uts_ns); if (ns->ipc_ns) @@ -97,7 +97,7 @@ int copy_namespaces(int flags, struct task_struct *tsk) tsk->nsproxy = new_ns; - err = copy_namespace(flags, tsk); + err = copy_mnt_ns(flags, tsk); if (err) goto out_ns; @@ -117,8 +117,8 @@ out_ipc: if (new_ns->uts_ns) put_uts_ns(new_ns->uts_ns); out_uts: - if (new_ns->namespace) - put_namespace(new_ns->namespace); + if (new_ns->mnt_ns) + put_mnt_ns(new_ns->mnt_ns); out_ns: tsk->nsproxy = old_ns; kfree(new_ns); @@ -127,8 +127,8 @@ out_ns: void free_nsproxy(struct nsproxy *ns) { - if (ns->namespace) - put_namespace(ns->namespace); + if (ns->mnt_ns) + put_mnt_ns(ns->mnt_ns); if (ns->uts_ns) put_uts_ns(ns->uts_ns); if (ns->ipc_ns) -- cgit v1.2.3 From 373beb35cd6b625e0ba4ad98baace12310a26aa8 Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Fri, 8 Dec 2006 02:37:57 -0800 Subject: [PATCH] identifier to nsproxy Add an identifier to nsproxy. The default init_ns_proxy has identifier 0 and allocated nsproxies are given -1. This identifier will be used by a new syscall sys_bind_ns. Signed-off-by: Cedric Le Goater Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Herbert Poetzl Cc: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 1 + include/linux/nsproxy.h | 1 + kernel/nsproxy.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5c4989172f7e..90c5f9a07730 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -75,6 +75,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ .count = ATOMIC_INIT(1), \ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ + .id = 0, \ .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ INIT_IPC_NS(ipc_ns) \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 0aba1b1a39c7..27f37c1ec1d9 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -23,6 +23,7 @@ struct ipc_namespace; struct nsproxy { atomic_t count; spinlock_t nslock; + unsigned long id; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index bd9cb435dfe0..f223c15c18e9 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -45,8 +45,10 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig) struct nsproxy *ns; ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); - if (ns) + if (ns) { atomic_set(&ns->count, 1); + ns->id = -1; + } return ns; } -- cgit v1.2.3 From 9a575a92db3312a40cdf0b0406d88de88ad9741e Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Fri, 8 Dec 2006 02:37:59 -0800 Subject: [PATCH] to nsproxy Add the pid namespace framework to the nsproxy object. The copy of the pid namespace only increases the refcount on the global pid namespace, init_pid_ns, and unshare is not implemented. There is no configuration option to activate or deactivate this feature because this not relevant for the moment. Signed-off-by: Cedric Le Goater Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Herbert Poetzl Cc: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 ++ include/linux/nsproxy.h | 2 ++ include/linux/pid_namespace.h | 20 ++++++++++++++++++-- kernel/nsproxy.c | 26 +++++++++++++++++++------- kernel/pid.c | 23 +++++++++++++++++++++++ 5 files changed, 64 insertions(+), 9 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 90c5f9a07730..7272ff9ee77c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -7,6 +7,7 @@ #include #include #include +#include #define INIT_FDTABLE \ { \ @@ -73,6 +74,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ + .pid_ns = &init_pid_ns, \ .count = ATOMIC_INIT(1), \ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .id = 0, \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 27f37c1ec1d9..fdfb0e44912f 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -7,6 +7,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; +struct pid_namespace; /* * A structure to contain pointers to all per-process @@ -27,6 +28,7 @@ struct nsproxy { struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; + struct pid_namespace *pid_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 54d79095e295..76e7c6b2cf33 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -5,6 +5,8 @@ #include #include #include +#include +#include struct pidmap { atomic_t nr_free; @@ -14,10 +16,24 @@ struct pidmap { #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) struct pid_namespace { - struct pidmap pidmap[PIDMAP_ENTRIES]; - int last_pid; + struct kref kref; + struct pidmap pidmap[PIDMAP_ENTRIES]; + int last_pid; }; extern struct pid_namespace init_pid_ns; +static inline void get_pid_ns(struct pid_namespace *ns) +{ + kref_get(&ns->kref); +} + +extern int copy_pid_ns(int flags, struct task_struct *tsk); +extern void free_pid_ns(struct kref *kref); + +static inline void put_pid_ns(struct pid_namespace *ns) +{ + kref_put(&ns->kref, free_pid_ns); +} + #endif /* _LINUX_PID_NS_H */ diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f223c15c18e9..e2ce748e96af 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -19,6 +19,7 @@ #include #include #include +#include struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); @@ -68,6 +69,8 @@ struct nsproxy *dup_namespaces(struct nsproxy *orig) get_uts_ns(ns->uts_ns); if (ns->ipc_ns) get_ipc_ns(ns->ipc_ns); + if (ns->pid_ns) + get_pid_ns(ns->pid_ns); } return ns; @@ -111,10 +114,17 @@ int copy_namespaces(int flags, struct task_struct *tsk) if (err) goto out_ipc; + err = copy_pid_ns(flags, tsk); + if (err) + goto out_pid; + out: put_nsproxy(old_ns); return err; +out_pid: + if (new_ns->ipc_ns) + put_ipc_ns(new_ns->ipc_ns); out_ipc: if (new_ns->uts_ns) put_uts_ns(new_ns->uts_ns); @@ -129,11 +139,13 @@ out_ns: void free_nsproxy(struct nsproxy *ns) { - if (ns->mnt_ns) - put_mnt_ns(ns->mnt_ns); - if (ns->uts_ns) - put_uts_ns(ns->uts_ns); - if (ns->ipc_ns) - put_ipc_ns(ns->ipc_ns); - kfree(ns); + if (ns->mnt_ns) + put_mnt_ns(ns->mnt_ns); + if (ns->uts_ns) + put_uts_ns(ns->uts_ns); + if (ns->ipc_ns) + put_ipc_ns(ns->ipc_ns); + if (ns->pid_ns) + put_pid_ns(ns->pid_ns); + kfree(ns); } diff --git a/kernel/pid.c b/kernel/pid.c index 25807e1b98dd..5319b9f2fc5e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -59,6 +59,9 @@ static inline int mk_pid(struct pid_namespace *pid_ns, * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, .pidmap = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, @@ -356,6 +359,26 @@ struct pid *find_ge_pid(int nr) } EXPORT_SYMBOL_GPL(find_get_pid); +int copy_pid_ns(int flags, struct task_struct *tsk) +{ + struct pid_namespace *old_ns = tsk->nsproxy->pid_ns; + int err = 0; + + if (!old_ns) + return 0; + + get_pid_ns(old_ns); + return err; +} + +void free_pid_ns(struct kref *kref) +{ + struct pid_namespace *ns; + + ns = container_of(kref, struct pid_namespace, kref); + kfree(ns); +} + /* * The pid hash table is scaled according to the amount of memory in the * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or -- cgit v1.2.3 From bbea9f69668a3d0cf9feba15a724cd02896f8675 Mon Sep 17 00:00:00 2001 From: Vadim Lobanov Date: Sun, 10 Dec 2006 02:21:12 -0800 Subject: [PATCH] fdtable: Make fdarray and fdsets equal in size Currently, each fdtable supports three dynamically-sized arrays of data: the fdarray and two fdsets. The code allows the number of fds supported by the fdarray (fdtable->max_fds) to differ from the number of fds supported by each of the fdsets (fdtable->max_fdset). In practice, it is wasteful for these two sizes to differ: whenever we hit a limit on the smaller-capacity structure, we will reallocate the entire fdtable and all the dynamic arrays within it, so any delta in the memory used by the larger-capacity structure will never be touched at all. Rather than hogging this excess, we shouldn't even allocate it in the first place, and keep the capacities of the fdarray and the fdsets equal. This patch removes fdtable->max_fdset. As an added bonus, most of the supporting code becomes simpler. Signed-off-by: Vadim Lobanov Cc: Christoph Hellwig Cc: Al Viro Cc: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/osf_sys.c | 6 ++--- arch/mips/kernel/kspd.c | 2 +- fs/compat.c | 10 ++++---- fs/exec.c | 2 +- fs/fcntl.c | 5 ++-- fs/file.c | 56 ++++++++++++++++++--------------------------- fs/open.c | 3 +-- fs/select.c | 10 ++++---- include/linux/file.h | 6 ----- include/linux/init_task.h | 1 - kernel/exit.c | 2 +- kernel/fork.c | 24 +++++-------------- security/selinux/hooks.c | 2 +- 13 files changed, 48 insertions(+), 81 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index fb804043b320..be133f1f75a4 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -979,7 +979,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, long timeout; int ret = -EINVAL; struct fdtable *fdt; - int max_fdset; + int max_fds; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -1003,9 +1003,9 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, rcu_read_lock(); fdt = files_fdtable(current->files); - max_fdset = fdt->max_fdset; + max_fds = fdt->max_fds; rcu_read_unlock(); - if (n < 0 || n > max_fdset) + if (n < 0 || n > max_fds) goto out_nofds; /* diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index 2c82412b9efe..5929f883e46b 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -301,7 +301,7 @@ static void sp_cleanup(void) for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= fdt->max_fdset || i >= fdt->max_fds) + if (i >= fdt->max_fds) break; set = fdt->open_fds->fds_bits[j++]; while (set) { diff --git a/fs/compat.c b/fs/compat.c index b766964a625c..0ec70e3cee0a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1679,19 +1679,19 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, { fd_set_bits fds; char *bits; - int size, max_fdset, ret = -EINVAL; + int size, max_fds, ret = -EINVAL; struct fdtable *fdt; if (n < 0) goto out_nofds; - /* max_fdset can increase, so grab it once to avoid race */ + /* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); - max_fdset = fdt->max_fdset; + max_fds = fdt->max_fds; rcu_read_unlock(); - if (n > max_fdset) - n = max_fdset; + if (n > max_fds) + n = max_fds; /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), diff --git a/fs/exec.c b/fs/exec.c index 12d8cd461b41..11fe93f7363c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -783,7 +783,7 @@ static void flush_old_files(struct files_struct * files) j++; i = j * __NFDBITS; fdt = files_fdtable(files); - if (i >= fdt->max_fds || i >= fdt->max_fdset) + if (i >= fdt->max_fds) break; set = fdt->close_on_exec->fds_bits[j]; if (!set) diff --git a/fs/fcntl.c b/fs/fcntl.c index 2bdaef35da54..8e382a5d51bd 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -77,10 +77,9 @@ repeat: start = files->next_fd; newfd = start; - if (start < fdt->max_fdset) { + if (start < fdt->max_fds) newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fdset, start); - } + fdt->max_fds, start); error = -EMFILE; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) diff --git a/fs/file.c b/fs/file.c index 51aef675470f..fb3d2038dc21 100644 --- a/fs/file.c +++ b/fs/file.c @@ -68,8 +68,8 @@ void free_fd_array(struct file **array, int num) static void __free_fdtable(struct fdtable *fdt) { - free_fdset(fdt->open_fds, fdt->max_fdset); - free_fdset(fdt->close_on_exec, fdt->max_fdset); + free_fdset(fdt->open_fds, fdt->max_fds); + free_fdset(fdt->close_on_exec, fdt->max_fds); free_fd_array(fdt->fd, fdt->max_fds); kfree(fdt); } @@ -98,7 +98,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu) struct fdtable_defer *fddef; BUG_ON(!fdt); - fdset_size = fdt->max_fdset / 8; + fdset_size = fdt->max_fds / 8; fdarray_size = fdt->max_fds * sizeof(struct file *); if (fdt->free_files) { @@ -110,13 +110,11 @@ static void free_fdtable_rcu(struct rcu_head *rcu) kmem_cache_free(files_cachep, fdt->free_files); return; } - if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE && - fdt->max_fds <= NR_OPEN_DEFAULT) { + if (fdt->max_fds <= NR_OPEN_DEFAULT) /* * The fdtable was embedded */ return; - } if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { kfree(fdt->open_fds); kfree(fdt->close_on_exec); @@ -136,9 +134,7 @@ static void free_fdtable_rcu(struct rcu_head *rcu) void free_fdtable(struct fdtable *fdt) { - if (fdt->free_files || - fdt->max_fdset > EMBEDDED_FD_SET_SIZE || - fdt->max_fds > NR_OPEN_DEFAULT) + if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT) call_rcu(&fdt->rcu, free_fdtable_rcu); } @@ -151,12 +147,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) int i; int count; - BUG_ON(nfdt->max_fdset < fdt->max_fdset); BUG_ON(nfdt->max_fds < fdt->max_fds); /* Copy the existing tables and install the new pointers */ - i = fdt->max_fdset / (sizeof(unsigned long) * 8); - count = (nfdt->max_fdset - fdt->max_fdset) / 8; + i = fdt->max_fds / (sizeof(unsigned long) * 8); + count = (nfdt->max_fds - fdt->max_fds) / 8; /* * Don't copy the entire array if the current fdset is @@ -164,9 +159,9 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) */ if (i) { memcpy (nfdt->open_fds, fdt->open_fds, - fdt->max_fdset/8); + fdt->max_fds/8); memcpy (nfdt->close_on_exec, fdt->close_on_exec, - fdt->max_fdset/8); + fdt->max_fds/8); memset (&nfdt->open_fds->fds_bits[i], 0, count); memset (&nfdt->close_on_exec->fds_bits[i], 0, count); } @@ -201,7 +196,7 @@ fd_set * alloc_fdset(int num) void free_fdset(fd_set *array, int num) { - if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */ + if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */ return; else if (num <= 8 * PAGE_SIZE) kfree(array); @@ -220,18 +215,6 @@ static struct fdtable *alloc_fdtable(int nr) if (!fdt) goto out; - nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1)); - if (nfds > NR_OPEN) - nfds = NR_OPEN; - - new_openset = alloc_fdset(nfds); - new_execset = alloc_fdset(nfds); - if (!new_openset || !new_execset) - goto out; - fdt->open_fds = new_openset; - fdt->close_on_exec = new_execset; - fdt->max_fdset = nfds; - nfds = NR_OPEN_DEFAULT; /* * Expand to the max in easy steps, and keep expanding it until @@ -251,15 +234,21 @@ static struct fdtable *alloc_fdtable(int nr) nfds = NR_OPEN; } } while (nfds <= nr); + + new_openset = alloc_fdset(nfds); + new_execset = alloc_fdset(nfds); + if (!new_openset || !new_execset) + goto out; + fdt->open_fds = new_openset; + fdt->close_on_exec = new_execset; + new_fds = alloc_fd_array(nfds); if (!new_fds) - goto out2; + goto out; fdt->fd = new_fds; fdt->max_fds = nfds; fdt->free_files = NULL; return fdt; -out2: - nfds = fdt->max_fdset; out: free_fdset(new_openset, nfds); free_fdset(new_execset, nfds); @@ -290,7 +279,7 @@ static int expand_fdtable(struct files_struct *files, int nr) * we dropped the lock */ cur_fdt = files_fdtable(files); - if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) { + if (nr >= cur_fdt->max_fds) { /* Continue as planned */ copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); @@ -316,11 +305,10 @@ int expand_files(struct files_struct *files, int nr) fdt = files_fdtable(files); /* Do we need to expand? */ - if (nr < fdt->max_fdset && nr < fdt->max_fds) + if (nr < fdt->max_fds) return 0; /* Can we expand? */ - if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN || - nr >= NR_OPEN) + if (nr >= NR_OPEN) return -EMFILE; /* All good, so we try */ diff --git a/fs/open.c b/fs/open.c index 0d94319e8681..c989fb4cf7b9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -864,8 +864,7 @@ int get_unused_fd(void) repeat: fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fdset, + fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, files->next_fd); /* diff --git a/fs/select.c b/fs/select.c index dcbc1112b7ec..fe0893afd931 100644 --- a/fs/select.c +++ b/fs/select.c @@ -311,7 +311,7 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, { fd_set_bits fds; void *bits; - int ret, max_fdset; + int ret, max_fds; unsigned int size; struct fdtable *fdt; /* Allocate small arguments on the stack to save memory and be faster */ @@ -321,13 +321,13 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, if (n < 0) goto out_nofds; - /* max_fdset can increase, so grab it once to avoid race */ + /* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); - max_fdset = fdt->max_fdset; + max_fds = fdt->max_fds; rcu_read_unlock(); - if (n > max_fdset) - n = max_fdset; + if (n > max_fds) + n = max_fds; /* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), diff --git a/include/linux/file.h b/include/linux/file.h index 6e77b9177f9e..02be4012225b 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -26,14 +26,8 @@ struct embedded_fd_set { unsigned long fds_bits[1]; }; -/* - * More than this number of fds: we use a separately allocated fd_set - */ -#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set)) - struct fdtable { unsigned int max_fds; - int max_fdset; struct file ** fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7272ff9ee77c..58c18daab65d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,7 +12,6 @@ #define INIT_FDTABLE \ { \ .max_fds = NR_OPEN_DEFAULT, \ - .max_fdset = EMBEDDED_FD_SET_SIZE, \ .fd = &init_files.fd_array[0], \ .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \ .open_fds = (fd_set *)&init_files.open_fds_init, \ diff --git a/kernel/exit.c b/kernel/exit.c index 03e64fe4a14a..5f77e76b4f97 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -425,7 +425,7 @@ static void close_files(struct files_struct * files) for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= fdt->max_fdset || i >= fdt->max_fds) + if (i >= fdt->max_fds) break; set = fdt->open_fds->fds_bits[j++]; while (set) { diff --git a/kernel/fork.c b/kernel/fork.c index 30eab4f063cd..aba595424f78 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -614,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) static int count_open_files(struct fdtable *fdt) { - int size = fdt->max_fdset; + int size = fdt->max_fds; int i; /* Find the last open fd */ @@ -641,7 +641,6 @@ static struct files_struct *alloc_files(void) newf->next_fd = 0; fdt = &newf->fdtab; fdt->max_fds = NR_OPEN_DEFAULT; - fdt->max_fdset = EMBEDDED_FD_SET_SIZE; fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; fdt->open_fds = (fd_set *)&newf->open_fds_init; fdt->fd = &newf->fd_array[0]; @@ -662,7 +661,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { struct files_struct *newf; struct file **old_fds, **new_fds; - int open_files, size, i, expand; + int open_files, size, i; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; @@ -673,25 +672,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); new_fdt = files_fdtable(newf); - size = old_fdt->max_fdset; open_files = count_open_files(old_fdt); - expand = 0; /* - * Check whether we need to allocate a larger fd array or fd set. - * Note: we're not a clone task, so the open count won't change. + * Check whether we need to allocate a larger fd array and fd set. + * Note: we're not a clone task, so the open count won't change. */ - if (open_files > new_fdt->max_fdset) { - new_fdt->max_fdset = 0; - expand = 1; - } if (open_files > new_fdt->max_fds) { new_fdt->max_fds = 0; - expand = 1; - } - - /* if the old fdset gets grown now, we'll only copy up to "size" fds */ - if (expand) { spin_unlock(&oldf->file_lock); spin_lock(&newf->file_lock); *errorp = expand_files(newf, open_files-1); @@ -739,8 +727,8 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) /* This is long word aligned thus could use a optimized version */ memset(new_fds, 0, size); - if (new_fdt->max_fdset > open_files) { - int left = (new_fdt->max_fdset-open_files)/8; + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; int start = open_files / (8 * sizeof(unsigned long)); memset(&new_fdt->open_fds->fds_bits[start], 0, left); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3753416eb9b9..65fb5e8ea941 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1734,7 +1734,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) j++; i = j * __NFDBITS; fdt = files_fdtable(files); - if (i >= fdt->max_fds || i >= fdt->max_fdset) + if (i >= fdt->max_fds) break; set = fdt->open_fds->fds_bits[j]; if (!set) -- cgit v1.2.3 From 4fd45812cbe875a620c86a096a5d46c742694b7e Mon Sep 17 00:00:00 2001 From: Vadim Lobanov Date: Sun, 10 Dec 2006 02:21:17 -0800 Subject: [PATCH] fdtable: Remove the free_files field An fdtable can either be embedded inside a files_struct or standalone (after being expanded). When an fdtable is being discarded after all RCU references to it have expired, we must either free it directly, in the standalone case, or free the files_struct it is contained within, in the embedded case. Currently the free_files field controls this behavior, but we can get rid of it entirely, as all the necessary information is already recorded. We can distinguish embedded and standalone fdtables using max_fds, and if it is embedded we can divine the relevant files_struct using container_of(). Signed-off-by: Vadim Lobanov Cc: Christoph Hellwig Cc: Al Viro Cc: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file.c | 27 ++++++++------------------- include/linux/file.h | 3 +-- include/linux/init_task.h | 1 - kernel/exit.c | 6 ++---- kernel/fork.c | 1 - 5 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/fs/file.c b/fs/file.c index fb3d2038dc21..17e6a55521e2 100644 --- a/fs/file.c +++ b/fs/file.c @@ -91,7 +91,7 @@ static void free_fdtable_work(struct work_struct *work) } } -static void free_fdtable_rcu(struct rcu_head *rcu) +void free_fdtable_rcu(struct rcu_head *rcu) { struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); int fdset_size, fdarray_size; @@ -101,20 +101,15 @@ static void free_fdtable_rcu(struct rcu_head *rcu) fdset_size = fdt->max_fds / 8; fdarray_size = fdt->max_fds * sizeof(struct file *); - if (fdt->free_files) { + if (fdt->max_fds <= NR_OPEN_DEFAULT) { /* - * The this fdtable was embedded in the files structure - * and the files structure itself was getting destroyed. - * It is now safe to free the files structure. + * This fdtable is embedded in the files structure and that + * structure itself is getting destroyed. */ - kmem_cache_free(files_cachep, fdt->free_files); + kmem_cache_free(files_cachep, + container_of(fdt, struct files_struct, fdtab)); return; } - if (fdt->max_fds <= NR_OPEN_DEFAULT) - /* - * The fdtable was embedded - */ - return; if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { kfree(fdt->open_fds); kfree(fdt->close_on_exec); @@ -132,12 +127,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu) } } -void free_fdtable(struct fdtable *fdt) -{ - if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT) - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -247,7 +236,6 @@ static struct fdtable *alloc_fdtable(int nr) goto out; fdt->fd = new_fds; fdt->max_fds = nfds; - fdt->free_files = NULL; return fdt; out: free_fdset(new_openset, nfds); @@ -283,7 +271,8 @@ static int expand_fdtable(struct files_struct *files, int nr) /* Continue as planned */ copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); - free_fdtable(cur_fdt); + if (cur_fdt->max_fds > NR_OPEN_DEFAULT) + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); } else { /* Somebody else expanded, so undo our attempt */ __free_fdtable(new_fdt); diff --git a/include/linux/file.h b/include/linux/file.h index 02be4012225b..319118f275b0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -32,7 +32,6 @@ struct fdtable { fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; - struct files_struct *free_files; struct fdtable *next; }; @@ -84,7 +83,7 @@ extern fd_set *alloc_fdset(int); extern void free_fdset(fd_set *, int); extern int expand_files(struct files_struct *, int nr); -extern void free_fdtable(struct fdtable *fdt); +extern void free_fdtable_rcu(struct rcu_head *rcu); extern void __init files_defer_init(void); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 58c18daab65d..b5315150199e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -16,7 +16,6 @@ .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \ .open_fds = (fd_set *)&init_files.open_fds_init, \ .rcu = RCU_HEAD_INIT, \ - .free_files = NULL, \ .next = NULL, \ } diff --git a/kernel/exit.c b/kernel/exit.c index 5f77e76b4f97..122fadb972fc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -466,11 +466,9 @@ void fastcall put_files_struct(struct files_struct *files) * you can free files immediately. */ fdt = files_fdtable(files); - if (fdt == &files->fdtab) - fdt->free_files = files; - else + if (fdt != &files->fdtab) kmem_cache_free(files_cachep, files); - free_fdtable(fdt); + call_rcu(&fdt->rcu, free_fdtable_rcu); } } diff --git a/kernel/fork.c b/kernel/fork.c index aba595424f78..d16c566eb645 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -645,7 +645,6 @@ static struct files_struct *alloc_files(void) fdt->open_fds = (fd_set *)&newf->open_fds_init; fdt->fd = &newf->fd_array[0]; INIT_RCU_HEAD(&fdt->rcu); - fdt->free_files = NULL; fdt->next = NULL; rcu_assign_pointer(newf->fdt, fdt); out: -- cgit v1.2.3 From 5f8442edfb214908e9c6ca1142bf882c9bc364e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 Dec 2006 00:34:04 -0800 Subject: [PATCH] Revert "[PATCH] identifier to nsproxy" This reverts commit 373beb35cd6b625e0ba4ad98baace12310a26aa8. No one is using this identifier yet. The purpose of this identifier is to export nsproxy to user space which is wrong. nsproxy is an internal implementation optimization, which should keep our fork times from getting slower as we increase the number of global namespaces you don't have to share. Adding a global identifier like this is inappropriate because it makes namespaces inherently non-recursive, greatly limiting what we can do with them in the future. Signed-off-by: Eric W. Biederman Cc: Cedric Le Goater Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 1 - include/linux/nsproxy.h | 1 - kernel/nsproxy.c | 4 +--- 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b5315150199e..6383d2d83bb0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -75,7 +75,6 @@ extern struct nsproxy init_nsproxy; .pid_ns = &init_pid_ns, \ .count = ATOMIC_INIT(1), \ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ - .id = 0, \ .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ INIT_IPC_NS(ipc_ns) \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index fdfb0e44912f..0b9f0dc30d61 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -24,7 +24,6 @@ struct pid_namespace; struct nsproxy { atomic_t count; spinlock_t nslock; - unsigned long id; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e2ce748e96af..f5b9ee6f6bbb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -46,10 +46,8 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig) struct nsproxy *ns; ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL); - if (ns) { + if (ns) atomic_set(&ns->count, 1); - ns->id = -1; - } return ns; } -- cgit v1.2.3