diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/ipc_sysctl.c | 36 | ||||
-rw-r--r-- | ipc/mqueue.c | 125 | ||||
-rw-r--r-- | ipc/shm.c | 117 |
3 files changed, 212 insertions, 66 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550..00fba2bab87d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; + memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } +static int proc_ipc_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +} + +static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (err < 0) + return err; + if (ns->shm_rmid_forced) + shm_destroy_orphaned(ns); + return err; +} + static int proc_ipc_callback_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_callback_dointvec NULL #define proc_ipcauto_dointvec_minmax NULL #endif @@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = { .proc_handler = proc_ipc_dointvec, }, { + .procname = "shm_rmid_forced", + .data = &init_ipc_ns.shm_rmid_forced, + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 14fb6d67e6a3..ed049ea568f4 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -113,72 +113,75 @@ static struct inode *mqueue_get_inode(struct super_block *sb, { struct user_struct *u = current_user(); struct inode *inode; + int ret = -ENOMEM; inode = new_inode(sb); - if (inode) { - inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_mtime = inode->i_ctime = inode->i_atime = - CURRENT_TIME; + if (!inode) + goto err; - if (S_ISREG(mode)) { - struct mqueue_inode_info *info; - struct task_struct *p = current; - unsigned long mq_bytes, mq_msg_tblsz; - - inode->i_fop = &mqueue_file_operations; - inode->i_size = FILENT_SIZE; - /* mqueue specific info */ - info = MQUEUE_I(inode); - spin_lock_init(&info->lock); - init_waitqueue_head(&info->wait_q); - INIT_LIST_HEAD(&info->e_wait_q[0].list); - INIT_LIST_HEAD(&info->e_wait_q[1].list); - info->notify_owner = NULL; - info->qsize = 0; - info->user = NULL; /* set when all is ok */ - memset(&info->attr, 0, sizeof(info->attr)); - info->attr.mq_maxmsg = ipc_ns->mq_msg_max; - info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; - if (attr) { - info->attr.mq_maxmsg = attr->mq_maxmsg; - info->attr.mq_msgsize = attr->mq_msgsize; - } - mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); - info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); - if (!info->messages) - goto out_inode; - - mq_bytes = (mq_msg_tblsz + - (info->attr.mq_maxmsg * info->attr.mq_msgsize)); - - spin_lock(&mq_lock); - if (u->mq_bytes + mq_bytes < u->mq_bytes || - u->mq_bytes + mq_bytes > - task_rlimit(p, RLIMIT_MSGQUEUE)) { - spin_unlock(&mq_lock); - /* mqueue_evict_inode() releases info->messages */ - goto out_inode; - } - u->mq_bytes += mq_bytes; - spin_unlock(&mq_lock); + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; + + if (S_ISREG(mode)) { + struct mqueue_inode_info *info; + struct task_struct *p = current; + unsigned long mq_bytes, mq_msg_tblsz; + + inode->i_fop = &mqueue_file_operations; + inode->i_size = FILENT_SIZE; + /* mqueue specific info */ + info = MQUEUE_I(inode); + spin_lock_init(&info->lock); + init_waitqueue_head(&info->wait_q); + INIT_LIST_HEAD(&info->e_wait_q[0].list); + INIT_LIST_HEAD(&info->e_wait_q[1].list); + info->notify_owner = NULL; + info->qsize = 0; + info->user = NULL; /* set when all is ok */ + memset(&info->attr, 0, sizeof(info->attr)); + info->attr.mq_maxmsg = ipc_ns->mq_msg_max; + info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; + if (attr) { + info->attr.mq_maxmsg = attr->mq_maxmsg; + info->attr.mq_msgsize = attr->mq_msgsize; + } + mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); + info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); + if (!info->messages) + goto out_inode; - /* all is ok */ - info->user = get_uid(u); - } else if (S_ISDIR(mode)) { - inc_nlink(inode); - /* Some things misbehave if size == 0 on a directory */ - inode->i_size = 2 * DIRENT_SIZE; - inode->i_op = &mqueue_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + mq_bytes = (mq_msg_tblsz + + (info->attr.mq_maxmsg * info->attr.mq_msgsize)); + + spin_lock(&mq_lock); + if (u->mq_bytes + mq_bytes < u->mq_bytes || + u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) { + spin_unlock(&mq_lock); + /* mqueue_evict_inode() releases info->messages */ + ret = -EMFILE; + goto out_inode; } + u->mq_bytes += mq_bytes; + spin_unlock(&mq_lock); + + /* all is ok */ + info->user = get_uid(u); + } else if (S_ISDIR(mode)) { + inc_nlink(inode); + /* Some things misbehave if size == 0 on a directory */ + inode->i_size = 2 * DIRENT_SIZE; + inode->i_op = &mqueue_dir_inode_operations; + inode->i_fop = &simple_dir_operations; } + return inode; out_inode: iput(inode); - return NULL; +err: + return ERR_PTR(ret); } static int mqueue_fill_super(struct super_block *sb, void *data, int silent) @@ -194,8 +197,8 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); - if (!inode) { - error = -ENOMEM; + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto out; } @@ -315,8 +318,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, spin_unlock(&mq_lock); inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr); - if (!inode) { - error = -ENOMEM; + if (IS_ERR(inode)) { + error = PTR_ERR(inode); spin_lock(&mq_lock); ipc_ns->mq_queues_count--; goto out_unlock; diff --git a/ipc/shm.c b/ipc/shm.c index 27884adb1a90..02ecf2c078fc 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } @@ -104,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns) } #endif -void __init shm_init (void) +static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + return 0; +} + +pure_initcall(ipc_ns_init); + +void __init shm_init (void) +{ ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", @@ -130,6 +138,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) return container_of(ipcp, struct shmid_kernel, shm_perm); } +static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) +{ + rcu_read_lock(); + spin_lock(&ipcp->shm_perm.lock); +} + static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, int id) { @@ -187,6 +201,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) } /* + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + +/* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list @@ -206,14 +237,90 @@ static void shm_close(struct vm_area_struct *vma) shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); up_write(&shm_ids(ns).rw_mutex); } +/* Called with ns->shm_ids(ns).rw_mutex locked */ +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + if (shp->shm_creator != current) + return 0; + + /* + * Mark it as orphaned to destroy the segment when + * kernel.shm_rmid_forced is changed. + * It is noop if the following shm_may_destroy() returns true. + */ + shp->shm_creator = NULL; + + /* + * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID + * is not set, it shouldn't be deleted here. + */ + if (!ns->shm_rmid_forced) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +/* Called with ns->shm_ids(ns).rw_mutex locked */ +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * As shp->* are changed under rw_mutex, it's safe to skip shp locking. + */ + if (shp->shm_creator != NULL) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rw_mutex); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rw_mutex); +} + + +void exit_shm(struct task_struct *task) +{ + struct ipc_namespace *ns = task->nsproxy->ipc_ns; + + if (shm_ids(ns).in_use == 0) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rw_mutex); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); + up_write(&shm_ids(ns).rw_mutex); +} + static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct file *file = vma->vm_file; @@ -404,6 +511,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_segsz = size; shp->shm_nattch = 0; shp->shm_file = file; + shp->shm_creator = current; /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. @@ -950,8 +1058,7 @@ out_nattch: shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); |