diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 20 | ||||
-rw-r--r-- | fs/proc/base.c | 296 | ||||
-rw-r--r-- | fs/proc/fd.c | 14 | ||||
-rw-r--r-- | fs/proc/generic.c | 17 | ||||
-rw-r--r-- | fs/proc/inode.c | 5 | ||||
-rw-r--r-- | fs/proc/internal.h | 27 | ||||
-rw-r--r-- | fs/proc/kcore.c | 6 | ||||
-rw-r--r-- | fs/proc/loadavg.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 7 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 72 | ||||
-rw-r--r-- | fs/proc/root.c | 11 | ||||
-rw-r--r-- | fs/proc/stat.c | 67 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 5 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 4 | ||||
-rw-r--r-- | fs/proc/uptime.c | 7 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 8 |
16 files changed, 298 insertions, 270 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index 51a4213afa2e..88c355574aa0 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -60,6 +60,10 @@ #include <linux/tty.h> #include <linux/string.h> #include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/sched/numa_balancing.h> +#include <linux/sched/task.h> +#include <linux/sched/cputime.h> #include <linux/proc_fs.h> #include <linux/ioport.h> #include <linux/uaccess.h> @@ -401,8 +405,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime, utime, stime; - cputime_t cgtime, gtime; + u64 cutime, cstime, utime, stime; + u64 cgtime, gtime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; @@ -497,10 +501,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", cmin_flt); seq_put_decimal_ull(m, " ", maj_flt); seq_put_decimal_ull(m, " ", cmaj_flt); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime)); seq_put_decimal_ll(m, " ", priority); seq_put_decimal_ll(m, " ", nice); seq_put_decimal_ll(m, " ", num_threads); @@ -542,8 +546,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", task->rt_priority); seq_put_decimal_ull(m, " ", task->policy); seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cgtime)); if (mm && permitted) { seq_put_decimal_ull(m, " ", mm->start_data); diff --git a/fs/proc/base.c b/fs/proc/base.c index 87c9a9aacda3..c87b6b9a8a76 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -85,6 +85,11 @@ #include <linux/user_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/debug.h> +#include <linux/sched/stat.h> #include <linux/flex_array.h> #include <linux/posix-timers.h> #ifdef CONFIG_HARDWALL @@ -292,101 +297,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, } } else { /* - * Command line (1 string) occupies ARGV and maybe - * extends into ENVP. - */ - if (len1 + len2 <= *pos) - goto skip_argv_envp; - if (len1 <= *pos) - goto skip_argv; - - p = arg_start + *pos; - len = len1 - *pos; - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* - * Command line can be shorter than whole ARGV - * even if last "marker" byte says it is not. - */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; - } -skip_argv: - /* * Command line (1 string) occupies ARGV and * extends into ENVP. */ - if (len1 <= *pos) { - p = env_start + *pos - len1; - len = len1 + len2 - *pos; - } else { - p = env_start; - len = len2; + struct { + unsigned long p; + unsigned long len; + } cmdline[2] = { + { .p = arg_start, .len = len1 }, + { .p = env_start, .len = len2 }, + }; + loff_t pos1 = *pos; + unsigned int i; + + i = 0; + while (i < 2 && pos1 >= cmdline[i].len) { + pos1 -= cmdline[i].len; + i++; } - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* Find EOS. */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; + while (i < 2) { + p = cmdline[i].p + pos1; + len = cmdline[i].len - pos1; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; } - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; + /* Only first chunk can be read partially. */ + pos1 = 0; + i++; } -skip_argv_envp: - ; } out_free_page: @@ -729,11 +702,11 @@ static int proc_pid_permission(struct inode *inode, int mask) task = get_proc_task(inode); if (!task) return -ESRCH; - has_perms = has_pid_permissions(pid, task, 1); + has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { - if (pid->hide_pid == 2) { + if (pid->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process @@ -798,7 +771,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); + mmgrab(mm); /* but do not pin its memory */ mmput(mm); } @@ -845,7 +818,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ @@ -953,7 +926,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, return -ENOMEM; ret = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; down_read(&mm->mmap_sem); @@ -1096,7 +1069,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) if (p) { if (atomic_read(&p->mm->mm_users) > 1) { mm = p->mm; - atomic_inc(&mm->mm_count); + mmgrab(mm); } task_unlock(p); } @@ -1667,12 +1640,63 @@ const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ +void task_dump_owner(struct task_struct *task, mode_t mode, + kuid_t *ruid, kgid_t *rgid) +{ + /* Depending on the state of dumpable compute who should own a + * proc file for a task. + */ + const struct cred *cred; + kuid_t uid; + kgid_t gid; + + /* Default to the tasks effective ownership */ + rcu_read_lock(); + cred = __task_cred(task); + uid = cred->euid; + gid = cred->egid; + rcu_read_unlock(); + + /* + * Before the /proc/pid/status file was created the only way to read + * the effective uid of a /process was to stat /proc/pid. Reading + * /proc/pid/status is slow enough that procps and other packages + * kept stating /proc/pid. To keep the rules in /proc simple I have + * made this apply to all per process world readable and executable + * directories. + */ + if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { + struct mm_struct *mm; + task_lock(task); + mm = task->mm; + /* Make non-dumpable tasks owned by some root */ + if (mm) { + if (get_dumpable(mm) != SUID_DUMP_USER) { + struct user_namespace *user_ns = mm->user_ns; + + uid = make_kuid(user_ns, 0); + if (!uid_valid(uid)) + uid = GLOBAL_ROOT_UID; + + gid = make_kgid(user_ns, 0); + if (!gid_valid(gid)) + gid = GLOBAL_ROOT_GID; + } + } else { + uid = GLOBAL_ROOT_UID; + gid = GLOBAL_ROOT_GID; + } + task_unlock(task); + } + *ruid = uid; + *rgid = gid; +} + struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; - const struct cred *cred; /* We need a new inode */ @@ -1694,13 +1718,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb, if (!ei->pid) goto out_unlock; - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: @@ -1711,12 +1729,12 @@ out_unlock: return NULL; } -int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int pid_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *task; - const struct cred *cred; - struct pid_namespace *pid = dentry->d_sb->s_fs_info; + struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1725,7 +1743,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { - if (!has_pid_permissions(pid, task, 2)) { + if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, @@ -1733,12 +1751,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) */ return -ENOENT; } - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - cred = __task_cred(task); - stat->uid = cred->euid; - stat->gid = cred->egid; - } + task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; @@ -1754,18 +1767,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * - * Before the /proc/pid/status file was created the only way to read - * the effective uid of a /process was to stat /proc/pid. Reading - * /proc/pid/status is slow enough that procps and other packages - * kept stating /proc/pid. To keep the rules in /proc simple I have - * made this apply to all per process world readable and executable - * directories. */ int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; - const struct cred *cred; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1774,17 +1780,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) task = get_proc_task(inode); if (task) { - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); + inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); put_task_struct(task); @@ -1881,7 +1878,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; - const struct cred *cred; struct inode *inode; int status = 0; @@ -1906,16 +1902,8 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) mmput(mm); if (exact_vma_exists) { - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + security_task_to_inode(task, inode); status = 1; } @@ -2179,7 +2167,7 @@ static const struct file_operations proc_map_files_operations = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; @@ -2488,6 +2476,12 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, length = -ESRCH; if (!task) goto out_no_task; + + /* A task may only write its own attributes. */ + length = -EACCES; + if (current != task) + goto out; + if (count > PAGE_SIZE) count = PAGE_SIZE; @@ -2503,14 +2497,13 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, } /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); + length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); if (length < 0) goto out_free; - length = security_setprocattr(task, - (char*)file->f_path.dentry->d_name.name, + length = security_setprocattr(file->f_path.dentry->d_name.name, page, count); - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); out_free: kfree(page); out: @@ -2936,7 +2929,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), @@ -3181,7 +3174,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) int len; cond_resched(); - if (!has_pid_permissions(ns, iter.task, 2)) + if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%d", iter.tgid); @@ -3524,9 +3517,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) return 0; } -static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +static int proc_task_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(inode, stat); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 4274f83bf100..c330495c3115 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -1,4 +1,4 @@ -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/dcache.h> #include <linux/path.h> @@ -84,7 +84,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct files_struct *files; struct task_struct *task; - const struct cred *cred; struct inode *inode; unsigned int fd; @@ -108,16 +107,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) rcu_read_unlock(); put_files_struct(files); - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); if (S_ISLNK(inode->i_mode)) { unsigned i_mode = S_IFLNK; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f6a01f09f79d..ee27feb34cf4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct rb_node *node = dir->subdir.rb_node; while (node) { - struct proc_dir_entry *de = container_of(node, - struct proc_dir_entry, - subdir_node); + struct proc_dir_entry *de = rb_entry(node, + struct proc_dir_entry, + subdir_node); int result = proc_match(len, name, de); if (result < 0) @@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, /* Figure out where to put new node */ while (*new) { - struct proc_dir_entry *this = - container_of(*new, struct proc_dir_entry, subdir_node); + struct proc_dir_entry *this = rb_entry(*new, + struct proc_dir_entry, + subdir_node); int result = proc_match(de->namelen, de->name, this); parent = *new; @@ -117,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) return 0; } -static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int proc_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct proc_dir_entry *de = PDE(inode); if (de && de->nlink) set_nlink(inode, de->nlink); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 842a5ff5b85c..2cc7a8030275 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode) de = PDE(inode); if (de) pde_put(de); + head = PROC_I(inode)->sysctl; if (head) { RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); - sysctl_head_put(head); + proc_sys_evict_inode(inode, head); } } @@ -106,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); - if (pid->hide_pid != 0) + if (pid->hide_pid != HIDEPID_OFF) seq_printf(seq, ",hidepid=%u", pid->hide_pid); return 0; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 2de5194ba378..c5ae09b6c726 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -14,6 +14,8 @@ #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> +#include <linux/sched/coredump.h> +#include <linux/sched/task.h> struct ctl_table_header; struct mempolicy; @@ -65,6 +67,7 @@ struct proc_inode { struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; + struct list_head sysctl_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; }; @@ -97,20 +100,8 @@ static inline struct task_struct *get_proc_task(struct inode *inode) return get_pid_task(proc_pid(inode), PIDTYPE_PID); } -static inline int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if (dumpable == SUID_DUMP_USER) - return 1; - return 0; -} +void task_dump_owner(struct task_struct *task, mode_t mode, + kuid_t *ruid, kgid_t *rgid); static inline unsigned name_to_int(const struct qstr *qstr) { @@ -160,7 +151,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, * base.c */ extern const struct dentry_operations pid_dentry_operations; -extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern int pid_revalidate(struct dentry *, unsigned int); @@ -249,10 +240,12 @@ extern void proc_thread_self_init(void); */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); -extern void sysctl_head_put(struct ctl_table_header *); +extern void proc_sys_evict_inode(struct inode *inode, + struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } -static inline void sysctl_head_put(struct ctl_table_header *head) { } +static inline void proc_sys_evict_inode(struct inode *inode, + struct ctl_table_header *head) { } #endif /* diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0b80ad87b4d6..4ee55274f155 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -28,6 +28,7 @@ #include <linux/list.h> #include <linux/ioport.h> #include <linux/memory.h> +#include <linux/sched/task.h> #include <asm/sections.h> #include "internal.h" @@ -373,7 +374,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; phdr->p_vaddr = (size_t)m->addr; - phdr->p_paddr = 0; + if (m->type == KCORE_RAM || m->type == KCORE_TEXT) + phdr->p_paddr = __pa(m->addr); + else + phdr->p_paddr = (elf_addr_t)-1; phdr->p_filesz = phdr->p_memsz = m->size; phdr->p_align = PAGE_SIZE; } diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index aec66e6c2060..983fce5c2418 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -3,6 +3,8 @@ #include <linux/pid_namespace.h> #include <linux/proc_fs.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/stat.h> #include <linux/seq_file.h> #include <linux/seqlock.h> #include <linux/time.h> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index ffd72a6c6e04..d72fc40241d9 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/sched/task.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/mount.h> @@ -140,10 +141,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, return de; } -static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct net *net; net = get_proc_task_net(inode); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d4e37acd4821..8f91ec66baa3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -8,6 +8,7 @@ #include <linux/printk.h> #include <linux/security.h> #include <linux/sched.h> +#include <linux/cred.h> #include <linux/namei.h> #include <linux/mm.h> #include <linux/module.h> @@ -190,6 +191,7 @@ static void init_header(struct ctl_table_header *head, head->set = set; head->parent = NULL; head->node = node; + INIT_LIST_HEAD(&head->inodes); if (node) { struct ctl_table *entry; for (entry = table; entry->procname; entry++, node++) @@ -259,6 +261,27 @@ static void unuse_table(struct ctl_table_header *p) complete(p->unregistering); } +/* called under sysctl_lock */ +static void proc_sys_prune_dcache(struct ctl_table_header *head) +{ + struct inode *inode, *prev = NULL; + struct proc_inode *ei; + + rcu_read_lock(); + list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { + inode = igrab(&ei->vfs_inode); + if (inode) { + rcu_read_unlock(); + iput(prev); + prev = inode; + d_prune_aliases(inode); + rcu_read_lock(); + } + } + rcu_read_unlock(); + iput(prev); +} + /* called under sysctl_lock, will reacquire if has to wait */ static void start_unregistering(struct ctl_table_header *p) { @@ -272,31 +295,22 @@ static void start_unregistering(struct ctl_table_header *p) p->unregistering = &wait; spin_unlock(&sysctl_lock); wait_for_completion(&wait); - spin_lock(&sysctl_lock); } else { /* anything non-NULL; we'll never dereference it */ p->unregistering = ERR_PTR(-EINVAL); + spin_unlock(&sysctl_lock); } /* + * Prune dentries for unregistered sysctls: namespaced sysctls + * can have duplicate names and contaminate dcache very badly. + */ + proc_sys_prune_dcache(p); + /* * do not remove from the list until nobody holds it; walking the * list in do_sysctl() relies on that. */ - erase_header(p); -} - -static void sysctl_head_get(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - head->count++; - spin_unlock(&sysctl_lock); -} - -void sysctl_head_put(struct ctl_table_header *head) -{ spin_lock(&sysctl_lock); - if (!--head->count) - kfree_rcu(head, rcu); - spin_unlock(&sysctl_lock); + erase_header(p); } static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) @@ -440,10 +454,20 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_ino = get_next_ino(); - sysctl_head_get(head); ei = PROC_I(inode); + + spin_lock(&sysctl_lock); + if (unlikely(head->unregistering)) { + spin_unlock(&sysctl_lock); + iput(inode); + inode = NULL; + goto out; + } ei->sysctl = head; ei->sysctl_entry = table; + list_add_rcu(&ei->sysctl_inodes, &head->inodes); + head->count++; + spin_unlock(&sysctl_lock); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mode = table->mode; @@ -466,6 +490,15 @@ out: return inode; } +void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + list_del_rcu(&PROC_I(inode)->sysctl_inodes); + if (!--head->count) + kfree_rcu(head, rcu); + spin_unlock(&sysctl_lock); +} + static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; @@ -769,9 +802,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return 0; } -static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +static int proc_sys_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; diff --git a/fs/proc/root.c b/fs/proc/root.c index 1988440b2049..deecb397daa3 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -14,12 +14,14 @@ #include <linux/stat.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/sched/stat.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/parser.h> +#include <linux/cred.h> #include "internal.h" @@ -58,7 +60,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid) case Opt_hidepid: if (match_int(&args[0], &option)) return 0; - if (option < 0 || option > 2) { + if (option < HIDEPID_OFF || + option > HIDEPID_INVISIBLE) { pr_err("proc: hidepid value must be between 0 and 2.\n"); return 0; } @@ -148,10 +151,10 @@ void __init proc_root_init(void) proc_sys_init(); } -static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat -) +static int proc_root_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - generic_fillattr(d_inode(dentry), stat); + generic_fillattr(d_inode(path->dentry), stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } diff --git a/fs/proc/stat.c b/fs/proc/stat.c index d700c42b3572..bd4e55f4aa20 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -5,11 +5,12 @@ #include <linux/kernel_stat.h> #include <linux/proc_fs.h> #include <linux/sched.h> +#include <linux/sched/stat.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/irqnr.h> -#include <linux/cputime.h> +#include <linux/sched/cputime.h> #include <linux/tick.h> #ifndef arch_irq_stat_cpu @@ -21,9 +22,9 @@ #ifdef arch_idle_time -static cputime64_t get_idle_time(int cpu) +static u64 get_idle_time(int cpu) { - cputime64_t idle; + u64 idle; idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) @@ -31,9 +32,9 @@ static cputime64_t get_idle_time(int cpu) return idle; } -static cputime64_t get_iowait_time(int cpu) +static u64 get_iowait_time(int cpu) { - cputime64_t iowait; + u64 iowait; iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; if (cpu_online(cpu) && nr_iowait_cpu(cpu)) @@ -45,32 +46,32 @@ static cputime64_t get_iowait_time(int cpu) static u64 get_idle_time(int cpu) { - u64 idle, idle_time = -1ULL; + u64 idle, idle_usecs = -1ULL; if (cpu_online(cpu)) - idle_time = get_cpu_idle_time_us(cpu, NULL); + idle_usecs = get_cpu_idle_time_us(cpu, NULL); - if (idle_time == -1ULL) + if (idle_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else - idle = usecs_to_cputime64(idle_time); + idle = idle_usecs * NSEC_PER_USEC; return idle; } static u64 get_iowait_time(int cpu) { - u64 iowait, iowait_time = -1ULL; + u64 iowait, iowait_usecs = -1ULL; if (cpu_online(cpu)) - iowait_time = get_cpu_iowait_time_us(cpu, NULL); + iowait_usecs = get_cpu_iowait_time_us(cpu, NULL); - if (iowait_time == -1ULL) + if (iowait_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else - iowait = usecs_to_cputime64(iowait_time); + iowait = iowait_usecs * NSEC_PER_USEC; return iowait; } @@ -115,16 +116,16 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { @@ -140,16 +141,16 @@ static int show_stat(struct seq_file *p, void *v) guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); } seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8f96a49178d0..f08bd31c1081 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -11,6 +11,7 @@ #include <linux/mempolicy.h> #include <linux/rmap.h> #include <linux/swap.h> +#include <linux/sched/mm.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> @@ -167,7 +168,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) return NULL; down_read(&mm->mmap_sem); @@ -1352,7 +1353,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, unsigned long end_vaddr; int ret = 0, copied = 0; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) goto out; ret = -EINVAL; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 37175621e890..23266694db11 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -7,6 +7,8 @@ #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/seq_file.h> +#include <linux/sched/mm.h> + #include "internal.h" /* @@ -219,7 +221,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) return NULL; down_read(&mm->mmap_sem); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 33de567c25af..7981c4ffe787 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,23 +5,20 @@ #include <linux/seq_file.h> #include <linux/time.h> #include <linux/kernel_stat.h> -#include <linux/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; struct timespec idle; - u64 idletime; u64 nsec; u32 rem; int i; - idletime = 0; + nsec = 0; for_each_possible_cpu(i) - idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; + nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; get_monotonic_boottime(&uptime); - nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; seq_printf(m, "%lu.%02lu %lu.%02lu\n", diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 5105b1599981..885d445afa0d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, * On s390 the fault handler is used for memory regions that can't be mapped * directly with remap_pfn_range(). */ -static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int mmap_vmcore_fault(struct vm_fault *vmf) { #ifdef CONFIG_S390 - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; pgoff_t index = vmf->pgoff; struct page *page; loff_t offset; @@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma, } return 0; fail: - do_munmap(vma->vm_mm, from, len); + do_munmap(vma->vm_mm, from, len, NULL); return -EAGAIN; } @@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) return 0; fail: - do_munmap(vma->vm_mm, vma->vm_start, len); + do_munmap(vma->vm_mm, vma->vm_start, len, NULL); return -EAGAIN; } #else |